diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index beca99c10ec72..a8c83113854c9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4563,8 +4563,10 @@ static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) { /// way through the source. static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget) { - // We need to be able to widen elements to the next larger integer type. - if (VT.getScalarSizeInBits() >= Subtarget.getELen()) + // We need to be able to widen elements to the next larger integer type or + // use the zip2a instruction at e64. + if (VT.getScalarSizeInBits() >= Subtarget.getELen() && + !Subtarget.hasVendorXRivosVizip()) return false; int Size = Mask.size(); @@ -4621,6 +4623,48 @@ static bool isElementRotate(const std::array, 2> &SrcInfo, SrcInfo[1].second - SrcInfo[0].second == (int)NumElts; } +static bool isAlternating(const std::array, 2> &SrcInfo, + ArrayRef Mask, bool RequiredPolarity) { + int NumElts = Mask.size(); + for (unsigned i = 0; i != NumElts; ++i) { + int M = Mask[i]; + if (M < 0) + continue; + int Src = M >= NumElts; + int Diff = (int)i - (M % NumElts); + bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second; + assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) && + "Must match exactly one of the two slides"); + if (RequiredPolarity != (C == i % 2)) + return false; + } + return true; +} + +/// Given a shuffle which can be represented as a pair of two slides, +/// see if it is a zipeven idiom. Zipeven is: +/// vs2: a0 a1 a2 a3 +/// vs1: b0 b1 b2 b3 +/// vd: a0 b0 a2 b2 +static bool isZipEven(const std::array, 2> &SrcInfo, + ArrayRef Mask) { + return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 && + isAlternating(SrcInfo, Mask, true); +} + +/// Given a shuffle which can be represented as a pair of two slides, +/// see if it is a zipodd idiom. Zipodd is: +/// vs2: a0 a1 a2 a3 +/// vs1: b0 b1 b2 b3 +/// vd: a1 b1 a3 b3 +/// Note that the operand order is swapped due to the way we canonicalize +/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2. +static bool isZipOdd(const std::array, 2> &SrcInfo, + ArrayRef Mask) { + return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 && + isAlternating(SrcInfo, Mask, false); +} + // Lower a deinterleave shuffle to SRL and TRUNC. Factor must be // 2, 4, 8 and the integer type Factor-times larger than VT's // element type must be a legal element type. @@ -4880,6 +4924,34 @@ static bool isSpreadMask(ArrayRef Mask, unsigned Factor, unsigned &Index) { return true; } +static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, + const SDLoc &DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc || + RISCVISD::RI_VZIP2A_VL == Opc); + assert(Op0.getSimpleValueType() == Op1.getSimpleValueType()); + + MVT VT = Op0.getSimpleValueType(); + MVT IntVT = VT.changeVectorElementTypeToInteger(); + Op0 = DAG.getBitcast(IntVT, Op0); + Op1 = DAG.getBitcast(IntVT, Op1); + + MVT ContainerVT = IntVT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget); + Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + } + + auto [Mask, VL] = getDefaultVLOps(IntVT, ContainerVT, DL, DAG, Subtarget); + SDValue Passthru = DAG.getUNDEF(ContainerVT); + SDValue Res = DAG.getNode(Opc, DL, ContainerVT, Op0, Op1, Passthru, Mask, VL); + if (IntVT.isFixedLengthVector()) + Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget); + Res = DAG.getBitcast(VT, Res); + return Res; +} + // Given a vector a, b, c, d return a vector Factor times longer // with Factor-1 undef's between elements. Ex: // a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0) @@ -5619,6 +5691,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, DAG.getVectorIdxConstant(OddSrc % Size, DL)); } + // Prefer vzip2a if available. + // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow. + if (Subtarget.hasVendorXRivosVizip()) { + EvenV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), + EvenV, DAG.getVectorIdxConstant(0, DL)); + OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), OddV, + DAG.getVectorIdxConstant(0, DL)); + return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget); + } return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); } @@ -5670,6 +5751,18 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, Res, DAG, Subtarget); } + if (Subtarget.hasVendorXRivosVizip() && isZipEven(SrcInfo, Mask)) { + SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2; + SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2; + return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG, + Subtarget); + } + if (Subtarget.hasVendorXRivosVizip() && isZipOdd(SrcInfo, Mask)) { + SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2; + SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2; + return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, Subtarget); + } + // Build the mask. Note that vslideup unconditionally preserves elements // below the slide amount in the destination, and thus those elements are // undefined in the mask. If the mask ends up all true (or undef), it @@ -6733,7 +6826,7 @@ static bool hasPassthruOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 130 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) @@ -6757,12 +6850,13 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 130 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) return true; - if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL) + if (Opcode >= RISCVISD::VRGATHER_VX_VL && + Opcode <= RISCVISD::LAST_VL_VECTOR_OP) return true; if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL) @@ -21807,6 +21901,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VZEXT_VL) NODE_NAME_CASE(VCPOP_VL) NODE_NAME_CASE(VFIRST_VL) + NODE_NAME_CASE(RI_VZIPEVEN_VL) + NODE_NAME_CASE(RI_VZIPODD_VL) + NODE_NAME_CASE(RI_VZIP2A_VL) NODE_NAME_CASE(READ_CSR) NODE_NAME_CASE(WRITE_CSR) NODE_NAME_CASE(SWAP_CSR) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index f4d6cd86397a4..5ebdbbd51f2b1 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -403,7 +403,12 @@ enum NodeType : unsigned { // vfirst.m with additional mask and VL operands. VFIRST_VL, - LAST_VL_VECTOR_OP = VFIRST_VL, + // XRivosVizip + RI_VZIPEVEN_VL, + RI_VZIPODD_VL, + RI_VZIP2A_VL, + + LAST_VL_VECTOR_OP = RI_VZIP2A_VL, // Read VLENB CSR READ_VLENB, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td index 78c4ed6f00412..3fe50503f937b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td @@ -67,6 +67,38 @@ defm RI_VUNZIP2A_V : VALU_IV_V<"ri.vunzip2a", 0b001000>; defm RI_VUNZIP2B_V : VALU_IV_V<"ri.vunzip2b", 0b011000>; } +// These are modeled after the int binop VL nodes +def ri_vzipeven_vl : SDNode<"RISCVISD::RI_VZIPEVEN_VL", SDT_RISCVIntBinOp_VL>; +def ri_vzipodd_vl : SDNode<"RISCVISD::RI_VZIPODD_VL", SDT_RISCVIntBinOp_VL>; +def ri_vzip2a_vl : SDNode<"RISCVISD::RI_VZIP2A_VL", SDT_RISCVIntBinOp_VL>; + +multiclass RIVPseudoVALU_VV { + foreach m = MxList in + defm "" : VPseudoBinaryV_VV; +} + +let Predicates = [HasVendorXRivosVizip], + Constraints = "@earlyclobber $rd, $rd = $passthru" in { +defm PseudoRI_VZIPEVEN : RIVPseudoVALU_VV; +defm PseudoRI_VZIPODD : RIVPseudoVALU_VV; +defm PseudoRI_VZIP2A : RIVPseudoVALU_VV; +} + +multiclass RIVPatBinaryVL_VV vtilist = AllIntegerVectors, + bit isSEWAware = false> { + foreach vti = vtilist in + let Predicates = GetVTypePredicates.Predicates in + def : VPatBinaryVL_V; +} + +defm : RIVPatBinaryVL_VV; +defm : RIVPatBinaryVL_VV; +defm : RIVPatBinaryVL_VV; + //===----------------------------------------------------------------------===// // XRivosVisni //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll index 6ed288ff011e7..917613d5c786f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128 ; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+zvl512b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512 ; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zvl512b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512 +; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,RV32-ZIP +; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,RV64-ZIP ; Test optimizing interleaves to widening arithmetic. @@ -15,6 +17,13 @@ define <4 x i8> @interleave_v2i8(<2 x i8> %x, <2 x i8> %y) { ; CHECK-NEXT: vwmaccu.vx v10, a0, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret +; +; ZIP-LABEL: interleave_v2i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9 +; ZIP-NEXT: vmv1r.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <2 x i8> %x, <2 x i8> %y, <4 x i32> ret <4 x i8> %a } @@ -28,6 +37,13 @@ define <4 x i16> @interleave_v2i16(<2 x i16> %x, <2 x i16> %y) { ; CHECK-NEXT: vwmaccu.vx v10, a0, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret +; +; ZIP-LABEL: interleave_v2i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9 +; ZIP-NEXT: vmv1r.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <2 x i16> %x, <2 x i16> %y, <4 x i32> ret <4 x i16> %a } @@ -42,6 +58,13 @@ define <4 x i32> @interleave_v2i32(<2 x i32> %x, <2 x i32> %y) { ; CHECK-NEXT: vwmaccu.vx v10, a0, v8 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret +; +; ZIP-LABEL: interleave_v2i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v9, v8 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> ret <4 x i32> %a } @@ -72,6 +95,14 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; V512-NEXT: vslideup.vi v11, v8, 1 ; V512-NEXT: vmerge.vvm v8, v11, v10, v0 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v2i64: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: vmv1r.v v12, v9 +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <2 x i64> %x, <2 x i64> %y, <4 x i32> ret <4 x i64> %a } @@ -95,6 +126,13 @@ define <8 x i8> @interleave_v4i8(<4 x i8> %x, <4 x i8> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v8 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v4i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v9, v8 +; ZIP-NEXT: vmv1r.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> ret <8 x i8> %a } @@ -118,6 +156,13 @@ define <8 x i16> @interleave_v4i16(<4 x i16> %x, <4 x i16> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v4i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> ret <8 x i16> %a } @@ -141,6 +186,14 @@ define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v4i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: vmv1r.v v12, v9 +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <4 x i32> %x, <4 x i32> %y, <8 x i32> ret <8 x i32> %a } @@ -167,6 +220,15 @@ define <4 x i32> @interleave_v4i32_offset_2(<4 x i32> %x, <4 x i32> %y) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v4i32_offset_2: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v9, 2 +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> ret <4 x i32> %a } @@ -198,6 +260,17 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma ; V512-NEXT: vmerge.vvm v8, v9, v10, v0 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v4i32_offset_1: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; ZIP-NEXT: vmv.v.i v0, 8 +; ZIP-NEXT: vmv1r.v v10, v9 +; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t +; ZIP-NEXT: vmv.v.i v0, 10 +; ZIP-NEXT: ri.vzip2a.vv v11, v8, v9 +; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZIP-NEXT: ret %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> ret <4 x i32> %a } @@ -220,6 +293,13 @@ define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v8i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> ret <16 x i8> %a } @@ -244,6 +324,14 @@ define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v8 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v8i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZIP-NEXT: vmv1r.v v12, v9 +; ZIP-NEXT: ri.vzip2a.vv v10, v12, v8 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <8 x i16> %x, <8 x i16> %y, <16 x i32> ret <16 x i16> %a } @@ -267,6 +355,14 @@ define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v8i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZIP-NEXT: vmv2r.v v16, v10 +; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret %a = shufflevector <8 x i32> %x, <8 x i32> %y, <16 x i32> ret <16 x i32> %a } @@ -290,6 +386,16 @@ define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v16i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; ZIP-NEXT: vmv1r.v v12, v9 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <16 x i8> %x, <16 x i8> %y, <32 x i32> ret <32 x i8> %a } @@ -313,6 +419,16 @@ define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v16i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; ZIP-NEXT: vmv2r.v v16, v10 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret %a = shufflevector <16 x i16> %x, <16 x i16> %y, <32 x i32> ret <32 x i16> %a } @@ -337,6 +453,16 @@ define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) { ; V512-NEXT: li a0, -1 ; V512-NEXT: vwmaccu.vx v8, a0, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v16i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; ZIP-NEXT: vmv4r.v v24, v12 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24 +; ZIP-NEXT: vmv.v.v v8, v16 +; ZIP-NEXT: ret %a = shufflevector <16 x i32> %x, <16 x i32> %y, <32 x i32> ret <32 x i32> %a } @@ -363,6 +489,16 @@ define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v32i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; ZIP-NEXT: vmv2r.v v16, v10 +; ZIP-NEXT: li a0, 64 +; ZIP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret %a = shufflevector <32 x i8> %x, <32 x i8> %y, <64 x i32> ret <64 x i8> %a } @@ -391,6 +527,16 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) { ; V512-NEXT: li a0, -1 ; V512-NEXT: vwmaccu.vx v8, a0, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v32i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; ZIP-NEXT: vmv4r.v v24, v12 +; ZIP-NEXT: li a0, 64 +; ZIP-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24 +; ZIP-NEXT: vmv.v.v v8, v16 +; ZIP-NEXT: ret %a = shufflevector <32 x i16> %x, <32 x i16> %y, <64 x i32> ret <64 x i16> %a } @@ -446,6 +592,78 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V512-NEXT: li a0, -1 ; V512-NEXT: vwmaccu.vx v8, a0, v12 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_v32i32: +; ZIP: # %bb.0: +; ZIP-NEXT: addi sp, sp, -16 +; ZIP-NEXT: .cfi_def_cfa_offset 16 +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: slli a0, a0, 5 +; ZIP-NEXT: sub sp, sp, a0 +; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: li a1, 24 +; ZIP-NEXT: mul a0, a0, a1 +; ZIP-NEXT: add a0, sp, a0 +; ZIP-NEXT: addi a0, a0, 16 +; ZIP-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; ZIP-NEXT: addi a0, sp, 16 +; ZIP-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; ZIP-NEXT: vslidedown.vi v24, v8, 16 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v16, v24, v0 +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: li a2, 24 +; ZIP-NEXT: mul a1, a1, a2 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; ZIP-NEXT: vslidedown.vi v24, v24, 16 +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 4 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; ZIP-NEXT: lui a1, 699051 +; ZIP-NEXT: addi a1, a1, -1366 +; ZIP-NEXT: vmv.s.x v0, a1 +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 3 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 4 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; ZIP-NEXT: csrr a1, vlenb +; ZIP-NEXT: slli a1, a1, 3 +; ZIP-NEXT: add a1, sp, a1 +; ZIP-NEXT: addi a1, a1, 16 +; ZIP-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload +; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24, v0.t +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: li a1, 24 +; ZIP-NEXT: mul a0, a0, a1 +; ZIP-NEXT: add a0, sp, a0 +; ZIP-NEXT: addi a0, a0, 16 +; ZIP-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; ZIP-NEXT: addi a0, sp, 16 +; ZIP-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; ZIP-NEXT: ri.vzip2a.vv v0, v8, v24 +; ZIP-NEXT: vmv.v.v v8, v0 +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: slli a0, a0, 5 +; ZIP-NEXT: add sp, sp, a0 +; ZIP-NEXT: .cfi_def_cfa sp, 16 +; ZIP-NEXT: addi sp, sp, 16 +; ZIP-NEXT: .cfi_def_cfa_offset 0 +; ZIP-NEXT: ret %a = shufflevector <32 x i32> %x, <32 x i32> %y, <64 x i32> ret <64 x i32> %a } @@ -471,6 +689,15 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v4i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v8, 2 +; ZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10 +; ZIP-NEXT: vmv1r.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> ret <4 x i8> %a } @@ -498,6 +725,17 @@ define <4 x i8> @unary_interleave_v4i8_invalid(<4 x i8> %x) { ; V512-NEXT: vrgather.vv v9, v8, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v4i8_invalid: +; ZIP: # %bb.0: +; ZIP-NEXT: lui a0, 16 +; ZIP-NEXT: addi a0, a0, 768 +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vmv.s.x v10, a0 +; ZIP-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; ZIP-NEXT: vrgather.vv v9, v8, v10 +; ZIP-NEXT: vmv1r.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> ret <4 x i8> %a } @@ -523,6 +761,15 @@ define <4 x i16> @unary_interleave_v4i16(<4 x i16> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v4i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v8, 2 +; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10 +; ZIP-NEXT: vmv1r.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> ret <4 x i16> %a } @@ -548,6 +795,15 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v4i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v8, 2 +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> ret <4 x i32> %a } @@ -590,6 +846,15 @@ define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) { ; RV64-V512-NEXT: vrgather.vv v9, v8, v10 ; RV64-V512-NEXT: vmv.v.v v8, v9 ; RV64-V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v4i64: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma +; ZIP-NEXT: vslidedown.vi v12, v8, 2 +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> ret <4 x i64> %a } @@ -615,6 +880,15 @@ define <8 x i8> @unary_interleave_v8i8(<8 x i8> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v8i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v8, 4 +; ZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10 +; ZIP-NEXT: vmv1r.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <8 x i8> %x, <8 x i8> poison, <8 x i32> ret <8 x i8> %a } @@ -640,6 +914,15 @@ define <8 x i16> @unary_interleave_v8i16(<8 x i16> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v8 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v8i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; ZIP-NEXT: vslidedown.vi v10, v8, 4 +; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v9, v10, v8 +; ZIP-NEXT: vmv.v.v v8, v9 +; ZIP-NEXT: ret %a = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> ret <8 x i16> %a } @@ -665,6 +948,15 @@ define <8 x i32> @unary_interleave_v8i32(<8 x i32> %x) { ; V512-NEXT: vwmaccu.vx v9, a0, v10 ; V512-NEXT: vmv1r.v v8, v9 ; V512-NEXT: ret +; +; ZIP-LABEL: unary_interleave_v8i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; ZIP-NEXT: vslidedown.vi v12, v8, 4 +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret %a = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> ret <8 x i32> %a } @@ -679,6 +971,14 @@ define <4 x i8> @unary_interleave_10uu_v4i8(<4 x i8> %x) { ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZIP-LABEL: unary_interleave_10uu_v4i8: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZIP-NEXT: vsrl.vi v9, v8, 8 +; ZIP-NEXT: vsll.vi v8, v8, 8 +; ZIP-NEXT: vor.vv v8, v8, v9 +; ZIP-NEXT: ret %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> ret <4 x i8> %a } @@ -702,6 +1002,14 @@ define <16 x i16> @interleave_slp(<8 x i16> %v0, <8 x i16> %v1) { ; V512-NEXT: vwmaccu.vx v10, a0, v9 ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret +; +; ZIP-LABEL: interleave_slp: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZIP-NEXT: vmv1r.v v12, v9 +; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %v2 = shufflevector <8 x i16> %v0, <8 x i16> poison, <16 x i32> %v3 = shufflevector <8 x i16> %v1, <8 x i16> poison, <16 x i32> @@ -711,4 +1019,6 @@ entry: ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV32-V128: {{.*}} +; RV32-ZIP: {{.*}} ; RV64-V128: {{.*}} +; RV64-ZIP: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll index c97f11301a05a..0a442940366e1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,ZIP-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,ZIP-RV64 define <4 x i32> @zipeven_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: zipeven_v4i32: @@ -9,6 +11,13 @@ define <4 x i32> @zipeven_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i32: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -22,6 +31,13 @@ define <4 x i32> @zipeven_v4i32_swapped(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i32_swapped: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v10, v9, v8 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -35,6 +51,13 @@ define <4 x i64> @zipeven_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i64: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %c @@ -47,6 +70,13 @@ define <4 x half> @zipeven_v4f16(<4 x half> %a, <4 x half> %b) { ; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4f16: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9 +; ZIP-NEXT: vmv1r.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> ret <4 x half> %c @@ -59,6 +89,13 @@ define <4 x float> @zipeven_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4f32: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %c @@ -72,6 +109,13 @@ define <4 x double> @zipeven_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4f64: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %c @@ -86,6 +130,13 @@ define <4 x i32> @zipodd_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v4i32: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -98,6 +149,13 @@ define <4 x i32> @zipodd_v4i32_swapped(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vslidedown.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v4i32_swapped: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v10, v9, v8 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -110,6 +168,10 @@ define <4 x i32> @zipeven_v4i32_single(<4 x i32> %a) { ; CHECK-LABEL: zipeven_v4i32_single: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i32_single: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> ret <4 x i32> %c @@ -124,6 +186,12 @@ define <4 x i32> @zipodd_v4i32_single(<4 x i32> %a) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v4i32_single: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vslidedown.vi v8, v8, 1 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> ret <4 x i32> %c @@ -136,6 +204,13 @@ define <4 x i32> @zipodd_v4i32_both(<4 x i32> %a) { ; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v4i32_both: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v9, v8, v8 +; ZIP-NEXT: vmv.v.v v8, v9 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> ret <4 x i32> %c @@ -150,6 +225,13 @@ define <4 x i32> @zipeven_v4i32_both(<4 x i32> %a) { ; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i32_both: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v9, v8, v8 +; ZIP-NEXT: vmv.v.v v8, v9 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> ret <4 x i32> %c @@ -161,6 +243,12 @@ define <4 x i32> @zipeven_v4i32_partial(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v4i32_partial: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; ZIP-NEXT: vslideup.vi v8, v9, 1 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -174,6 +262,13 @@ define <4 x i32> @zipodd_v4i32_partial(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v4i32_partial: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9 +; ZIP-NEXT: vmv.v.v v8, v10 +; ZIP-NEXT: ret entry: %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -187,6 +282,13 @@ define <8 x i32> @zipeven_v8i32(<8 x i32> %v1, <8 x i32> %v2) { ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v8i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> ret <8 x i32> %out } @@ -200,6 +302,13 @@ define <8 x i32> @zipodd_v8i32(<8 x i32> %v1, <8 x i32> %v2) { ; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v8i32: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v12, v8, v10 +; ZIP-NEXT: vmv.v.v v8, v12 +; ZIP-NEXT: ret %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> ret <8 x i32> %out } @@ -213,6 +322,13 @@ define <16 x i64> @zipeven_v16i64(<16 x i64> %v1, <16 x i64> %v2) { ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipeven_v16i64: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZIP-NEXT: ri.vzipeven.vv v24, v8, v16 +; ZIP-NEXT: vmv.v.v v8, v24 +; ZIP-NEXT: ret %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> ret <16 x i64> %out } @@ -227,9 +343,18 @@ define <16 x i64> @zipodd_v16i64(<16 x i64> %v1, <16 x i64> %v2) { ; CHECK-NEXT: vslidedown.vi v16, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret +; +; ZIP-LABEL: zipodd_v16i64: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZIP-NEXT: ri.vzipodd.vv v24, v8, v16 +; ZIP-NEXT: vmv.v.v v8, v24 +; ZIP-NEXT: ret %out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> ret <16 x i64> %out } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV32: {{.*}} ; RV64: {{.*}} +; ZIP-RV32: {{.*}} +; ZIP-RV64: {{.*}}