diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index f721d7148526b..e9c1063ab50a6 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -805,6 +805,13 @@ def FeatureStdExtZvksc def FeatureStdExtZvksg : RISCVExtension<1, 0, "shorthand for 'Zvks' and 'Zvkg'", [FeatureStdExtZvks, FeatureStdExtZvkg]>; + +def FeatureStdExtZvzip + : RISCVExperimentalExtension<0, 1, "zip/unzip/zipeven/zipodd">; +def HasStdExtZvzip : Predicate<"Subtarget->hasStdExtZvzip()">, + AssemblerPredicate<(all_of FeatureStdExtZvzip), + "'Zvzip'">; + // Vector instruction predicates def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f7efd5f437fbb..6d812584ce272 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4528,8 +4528,11 @@ static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, /// way through the source. static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget) { - // We need to be able to widen elements to the next larger integer type. - if (VT.getScalarSizeInBits() >= Subtarget.getELen()) + + // We need to be able to widen elements to the next larger integer type, + // or use zip2a. + if (VT.getScalarSizeInBits() >= Subtarget.getELen() && + !Subtarget.hasStdExtZvzip()) return false; int Size = Mask.size(); @@ -4915,6 +4918,72 @@ static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, return DAG.getBitcast(ResultVT, Result); } +// Note: This is really a lowerBinOpVL function, can we factor that +// into existing upstream code in a useful way? +static SDValue lowerZVZIP(unsigned Opc, SDValue Op0, SDValue Op1, + const SDLoc &DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(RISCVISD::VZIPEVEN_VL == Opc || RISCVISD::VZIPODD_VL == Opc || + RISCVISD::VZIP2A_VL == Opc || RISCVISD::VZIP2B_VL == Opc || + RISCVISD::VUNZIP2A_VL == Opc || RISCVISD::VUNZIP2B_VL == Opc); + assert(Op0.getSimpleValueType() == Op1.getSimpleValueType()); + + MVT VT = Op0.getSimpleValueType(); + MVT IntVT = VT.changeVectorElementTypeToInteger(); + Op0 = DAG.getBitcast(IntVT, Op0); + Op1 = DAG.getBitcast(IntVT, Op1); + + MVT ContainerVT = IntVT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget); + Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + } + + const MVT M1VT = getLMUL1VT(ContainerVT); + MVT InnerVT = ContainerVT; + if (ContainerVT.bitsLT(M1VT)) { + // unzip2a and unzip2b must have an undef operand if < m1 + assert(RISCVISD::VUNZIP2A_VL != Opc || RISCVISD::VUNZIP2B_VL != Opc || + Op1.isUndef()); + Op0 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, + DAG.getUNDEF(M1VT), Op0, + DAG.getVectorIdxConstant(0, DL)); + Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, + DAG.getUNDEF(M1VT), Op1, + DAG.getVectorIdxConstant(0, DL)); + InnerVT = M1VT; + } + + // TODO: Maybe this should be a DAG combine? + if (Op1.isUndef() && ContainerVT.bitsGT(M1VT) && + (RISCVISD::VUNZIP2A_VL == Opc || RISCVISD::VUNZIP2B_VL == Opc)) { + InnerVT = ContainerVT.getHalfNumVectorElementsVT(); + unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue(); + Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InnerVT, Op0, + DAG.getVectorIdxConstant(HighIdx, DL)); + Op0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InnerVT, Op0, + DAG.getVectorIdxConstant(0, DL)); + } + + auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget); + SDValue Passthru = DAG.getUNDEF(InnerVT); + SDValue Res = + DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL); + if (InnerVT.bitsGT(ContainerVT)) + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, + Res, DAG.getVectorIdxConstant(0, DL)); + else if (InnerVT.bitsLT(ContainerVT)) + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), Res, + DAG.getVectorIdxConstant(0, DL)); + if (IntVT.isFixedLengthVector()) + Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget); + Res = DAG.getBitcast(VT, Res); + return Res; +} + + // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx // to create an interleaved vector of <[vscale x] n*2 x ty>. // This requires that the size of ty is less than the subtarget's maximum ELEN. @@ -5111,6 +5180,46 @@ static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, return DAG.getBitcast(VT, Rotate); } +static SDValue lowerVECTOR_SHUFFLEAsZipEvenOdd(const SDLoc &DL, MVT VT, + SDValue V1, SDValue V2, + ArrayRef Mask, + const RISCVSubtarget &Subtarget, + SelectionDAG &DAG) { + // This figures out if the mask is a zip of two stride two access + // patterns (one for each input), and what the start of each sequence + // is. + int Starts[2] = {-1, -1}; + for (unsigned i = 0; i < Mask.size(); i++) { + int E = Mask[i]; + if (E == -1) + continue; + if (E >= (int)Mask.size()) + E -= Mask.size(); + bool S = i % 2; + int Start = E + S - i; + if (Starts[S] != Start) { + if (Starts[S] != -1) + return SDValue(); + Starts[S] = Start; + } + } + + auto [A, B] = Starts; + if (A != B || (A != 0 && A != 1)) + return SDValue(); + + // Given a mask which is a zipeven or zipodd, which inputs does each + // part of the zip use? This allows duplicate sources. + bool Sources[2] = {false, false}; + for (unsigned i = 0; i < Mask.size(); i++) { + Sources[i % 2] |= Mask[i] > Mask.size(); + } + SDValue Src1 = Sources[0] ? V2 : V1; + SDValue Src2 = Sources[1] ? V2 : V1; + unsigned Opcode = A == 0 ? RISCVISD::VZIPEVEN_VL : RISCVISD::VZIPODD_VL; + return lowerZVZIP(Opcode, Src1, Src2, DL, DAG, Subtarget); +} + // If compiling with an exactly known VLEN, see if we can split a // shuffle on m2 or larger into a small number of m1 sized shuffles // which write each destination registers exactly once. @@ -5549,6 +5658,53 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, } } + if (Subtarget.hasStdExtZvzip()) { + // If this is a deinterleave(2) - possibly with two distinct sources, and + // possibly at e64 - match to the vunzip2a/vunzip2b. Put this after the + // vnsrl matching as that's probably still a better canonical form. + // Note that we have a problem with the definition of this instruction. + // If VL is not a register boundary, the first half of first source and + // second half of second source is not the same as treating the pair + // of registers as a register group with the standard prefix layout. + unsigned Index = 0; + if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 2, Index) && + 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) { + const unsigned EltSize = VT.getScalarSizeInBits(); + const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize; + bool KnownLayout = false; + if (auto VLEN = Subtarget.getRealVLen()) + KnownLayout = VT.getSizeInBits().getKnownMinValue() % *VLEN == 0; + unsigned Opc = Index == 0 ? + RISCVISD::VUNZIP2A_VL : RISCVISD::VUNZIP2B_VL; + if (V2.isUndef() || KnownLayout) { + return lowerZVZIP(Opc, V1, V2, DL, DAG, Subtarget); + } else if (NumElts < MinVLMAX) { + MVT ConcatVT = VT.getDoubleNumVectorElementsVT(); + V1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2); + V2 = DAG.getUNDEF(ConcatVT); + SDValue Res = lowerZVZIP(Opc, V1, V2, DL, DAG, Subtarget); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getVectorIdxConstant(0, DL)); + } else { + MVT HalfVT = VT.getHalfNumVectorElementsVT(); + + V1 = lowerZVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget); + V2 = lowerZVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget); + + V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, + DAG.getVectorIdxConstant(0, DL)); + V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2, + DAG.getVectorIdxConstant(0, DL)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V1, V2); + } + } + + // Try to match a zipeven or zipodd + if (SDValue V = lowerVECTOR_SHUFFLEAsZipEvenOdd(DL, VT, V1, V2, Mask, + Subtarget, DAG)) + return V; + } + if (SDValue V = lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) return V; @@ -5587,6 +5743,18 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, DAG.getVectorIdxConstant(OddSrc % Size, DL)); } + // Prefer vzip2a if available. + // FIXME: prefer the spread idioms? And figure out what the equivalent are + // for the vzip2a cases to avoid undef issues? + if (Subtarget.hasStdExtZvzip()) { + EvenV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), + EvenV, DAG.getVectorIdxConstant(0, DL)); + OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), + OddV, DAG.getVectorIdxConstant(0, DL)); + return lowerZVZIP(RISCVISD::VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget); + + } + assert(VT.getScalarSizeInBits() < Subtarget.getELen()); return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); } @@ -6502,7 +6670,7 @@ static bool hasPassthruOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) @@ -6526,7 +6694,7 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) @@ -21020,6 +21188,12 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VZEXT_VL) NODE_NAME_CASE(VCPOP_VL) NODE_NAME_CASE(VFIRST_VL) + NODE_NAME_CASE(VZIPEVEN_VL) + NODE_NAME_CASE(VZIPODD_VL) + NODE_NAME_CASE(VZIP2A_VL) + NODE_NAME_CASE(VZIP2B_VL) + NODE_NAME_CASE(VUNZIP2A_VL) + NODE_NAME_CASE(VUNZIP2B_VL) NODE_NAME_CASE(READ_CSR) NODE_NAME_CASE(WRITE_CSR) NODE_NAME_CASE(SWAP_CSR) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 21747cc353203..4de2ab6ee6426 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -402,7 +402,16 @@ enum NodeType : unsigned { // vfirst.m with additional mask and VL operands. VFIRST_VL, - LAST_VL_VECTOR_OP = VFIRST_VL, + // Zvzip -- note that these are binary ops (like add), which creates + // some semantic oddness for unzip2a/b. + VZIPEVEN_VL, + VZIPODD_VL, + VZIP2A_VL, + VZIP2B_VL, + VUNZIP2A_VL, + VUNZIP2B_VL, + + LAST_VL_VECTOR_OP = VUNZIP2B_VL, // Read VLENB CSR READ_VLENB, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 24a881dc6810f..fc20b733bad68 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -1814,5 +1814,61 @@ let Predicates = [HasVInstructionsI64, IsRV64] in { } } // Predicates = [HasVInstructionsI64, IsRV64] + +// The experimental zvzip extension. Note these opcodes are very likely to +// change! +let Predicates = [HasStdExtZvzip], hasSideEffects = 0, mayLoad = 0, + mayStore = 0, isCodeGenOnly = 0, Inst<6-0> = OPC_CUSTOM_2.Value in { +defm VZIPEVEN_V : VALU_IV_V<"vzipeven", 0b001100>; +defm VZIPODD_V : VALU_IV_V<"vzipodd", 0b011100>; +defm VZIP2A_V : VALU_IV_V<"vzip2a", 0b000100>; +defm VZIP2B_V : VALU_IV_V<"vzip2b", 0b010100>; +defm VUNZIP2A_V : VALU_IV_V<"vunzip2a", 0b001000>; +defm VUNZIP2B_V : VALU_IV_V<"vunzip2b", 0b011000>; +} + include "RISCVInstrInfoZvfbf.td" include "RISCVInstrInfoVPseudos.td" + + +// These are modeled after the int binop VL nodes +def riscv_vzipeven_vl : SDNode<"RISCVISD::VZIPEVEN_VL", SDT_RISCVIntBinOp_VL>; +def riscv_vzipodd_vl : SDNode<"RISCVISD::VZIPODD_VL", SDT_RISCVIntBinOp_VL>; +def riscv_vzip2a_vl : SDNode<"RISCVISD::VZIP2A_VL", SDT_RISCVIntBinOp_VL>; +def riscv_vunzip2a_vl : SDNode<"RISCVISD::VUNZIP2A_VL", SDT_RISCVIntBinOp_VL>; +def riscv_vunzip2b_vl : SDNode<"RISCVISD::VUNZIP2B_VL", SDT_RISCVIntBinOp_VL>; + +multiclass VPseudoVALU_VV { + foreach m = MxList in { + defvar mx = m.MX; + defm "" : VPseudoBinaryV_VV, + SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", mx, + forcePassthruRead=true>; + } +} + +multiclass VPatBinaryVL_VV vtilist = AllIntegerVectors, + bit isSEWAware = 0> { + foreach vti = vtilist in + let Predicates = GetVTypePredicates.Predicates in + def : VPatBinaryVL_V; +} + +let Predicates = [HasStdExtZvzip], mayLoad = 0, mayStore = 0, + hasSideEffects = 0 in { +defm PseudoVZIPEVEN : VPseudoVALU_VV; +defm PseudoVZIPODD : VPseudoVALU_VV; +defm PseudoVZIP2A : VPseudoVALU_VV; +defm PseudoVUNZIP2A : VPseudoVALU_VV; +defm PseudoVUNZIP2B : VPseudoVALU_VV; +} + +defm : VPatBinaryVL_VV; +defm : VPatBinaryVL_VV; +defm : VPatBinaryVL_VV; +defm : VPatBinaryVL_VV; +defm : VPatBinaryVL_VV; diff --git a/llvm/test/CodeGen/RISCV/rvv/zvzip.ll b/llvm/test/CodeGen/RISCV/rvv/zvzip.ll new file mode 100644 index 0000000000000..176279b68476f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/zvzip.ll @@ -0,0 +1,893 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZIP +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvzip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZIP + +define <4 x i32> @zipeven(<4 x i32> %a, <4 x i32> %b) { +; NOZIP-LABEL: zipeven: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vmv.v.i v0, 10 +; NOZIP-NEXT: vslideup.vi v8, v9, 1, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipeven: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipeven.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @zipeven_swapped(<4 x i32> %a, <4 x i32> %b) { +; NOZIP-LABEL: zipeven_swapped: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vmv.v.i v0, 10 +; NOZIP-NEXT: vslideup.vi v9, v8, 1, v0.t +; NOZIP-NEXT: vmv.v.v v8, v9 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipeven_swapped: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipeven.vv v8, v9, v8 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i64> @zipeven_i64(<4 x i64> %a, <4 x i64> %b) { +; NOZIP-LABEL: zipeven_i64: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; NOZIP-NEXT: vmv.v.i v0, 10 +; NOZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; NOZIP-NEXT: vslideup.vi v8, v10, 1, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipeven_i64: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: vzipeven.vv v8, v8, v10 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +define <4 x half> @zipeven_half(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: zipeven_half: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lh a3, 0(a1) +; CHECK-NEXT: lh a4, 0(a2) +; CHECK-NEXT: lh a1, 16(a1) +; CHECK-NEXT: lh a2, 16(a2) +; CHECK-NEXT: sh a3, 0(a0) +; CHECK-NEXT: sh a4, 2(a0) +; CHECK-NEXT: sh a1, 4(a0) +; CHECK-NEXT: sh a2, 6(a0) +; CHECK-NEXT: ret +entry: + %c = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> + ret <4 x half> %c +} + +define <4 x float> @zipeven_float(<4 x float> %a, <4 x float> %b) { +; NOZIP-LABEL: zipeven_float: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vmv.v.i v0, 10 +; NOZIP-NEXT: vslideup.vi v8, v9, 1, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipeven_float: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipeven.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +define <4 x double> @zipeven_double(<4 x double> %a, <4 x double> %b) { +; NOZIP-LABEL: zipeven_double: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; NOZIP-NEXT: vmv.v.i v0, 10 +; NOZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; NOZIP-NEXT: vslideup.vi v8, v10, 1, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipeven_double: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: vzipeven.vv v8, v8, v10 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} + + +define <4 x i32> @zipodd(<4 x i32> %a, <4 x i32> %b) { +; NOZIP-LABEL: zipodd: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vmv.v.i v0, 5 +; NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t +; NOZIP-NEXT: vmv.v.v v8, v9 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipodd: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipodd.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @zipodd_swapped(<4 x i32> %a, <4 x i32> %b) { +; NOZIP-LABEL: zipodd_swapped: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vmv.v.i v0, 5 +; NOZIP-NEXT: vslidedown.vi v8, v9, 1, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipodd_swapped: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipodd.vv v8, v9, v8 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @zipeven_single(<4 x i32> %a) { +; CHECK-LABEL: zipeven_single: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @zipodd_single(<4 x i32> %a) { +; CHECK-LABEL: zipodd_single: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @zipodd_both(<4 x i32> %a) { +; NOZIP-LABEL: zipodd_both: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: lui a0, 12336 +; NOZIP-NEXT: addi a0, a0, 257 +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; NOZIP-NEXT: vmv.s.x v9, a0 +; NOZIP-NEXT: vsext.vf4 v10, v9 +; NOZIP-NEXT: vrgather.vv v9, v8, v10 +; NOZIP-NEXT: vmv.v.v v8, v9 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipodd_both: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipodd.vv v8, v8, v8 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @zipeven_both(<4 x i32> %a) { +; NOZIP-LABEL: zipeven_both: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: lui a0, 8224 +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; NOZIP-NEXT: vmv.s.x v9, a0 +; NOZIP-NEXT: vsext.vf4 v10, v9 +; NOZIP-NEXT: vrgather.vv v9, v8, v10 +; NOZIP-NEXT: vmv.v.v v8, v9 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipeven_both: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipeven.vv v8, v8, v8 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @zipeven_partial(<4 x i32> %a, <4 x i32> %b) { +; NOZIP-LABEL: zipeven_partial: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; NOZIP-NEXT: vslideup.vi v8, v9, 1 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipeven_partial: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipeven.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @zipodd_partial(<4 x i32> %a, <4 x i32> %b) { +; NOZIP-LABEL: zipodd_partial: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vmv.v.i v0, 13 +; NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t +; NOZIP-NEXT: vmv.v.v v8, v9 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipodd_partial: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipodd.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + + +define <4 x i32> @zip2a_i32(<4 x i32> %a, <4 x i32> %b) { +; NOZIP-LABEL: zip2a_i32: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; NOZIP-NEXT: vwaddu.vv v10, v8, v9 +; NOZIP-NEXT: li a0, -1 +; NOZIP-NEXT: vwmaccu.vx v10, a0, v9 +; NOZIP-NEXT: vmv1r.v v8, v10 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zip2a_i32: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzip2a.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i64> @zip2a_i64(<4 x i64> %a, <4 x i64> %b) { +; NOZIP-LABEL: zip2a_i64: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; NOZIP-NEXT: vid.v v12 +; NOZIP-NEXT: vsrl.vi v14, v12, 1 +; NOZIP-NEXT: vmv.v.i v0, 10 +; NOZIP-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; NOZIP-NEXT: vrgatherei16.vv v12, v8, v14 +; NOZIP-NEXT: vrgatherei16.vv v12, v10, v14, v0.t +; NOZIP-NEXT: vmv.v.v v8, v12 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zip2a_i64: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: vzip2a.vv v8, v8, v10 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +define <4 x half> @zip2a_half(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: zip2a_half: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lh a3, 0(a1) +; CHECK-NEXT: lh a4, 0(a2) +; CHECK-NEXT: lh a1, 8(a1) +; CHECK-NEXT: lh a2, 8(a2) +; CHECK-NEXT: sh a3, 0(a0) +; CHECK-NEXT: sh a4, 2(a0) +; CHECK-NEXT: sh a1, 4(a0) +; CHECK-NEXT: sh a2, 6(a0) +; CHECK-NEXT: ret +entry: + %c = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> + ret <4 x half> %c +} + +define <4 x float> @zip2a_float(<4 x float> %a, <4 x float> %b) { +; NOZIP-LABEL: zip2a_float: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; NOZIP-NEXT: vwaddu.vv v10, v8, v9 +; NOZIP-NEXT: li a0, -1 +; NOZIP-NEXT: vwmaccu.vx v10, a0, v9 +; NOZIP-NEXT: vmv1r.v v8, v10 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zip2a_float: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzip2a.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +define <4 x double> @zip2a_double(<4 x double> %a, <4 x double> %b) { +; NOZIP-LABEL: zip2a_double: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; NOZIP-NEXT: vid.v v12 +; NOZIP-NEXT: vsrl.vi v14, v12, 1 +; NOZIP-NEXT: vmv.v.i v0, 10 +; NOZIP-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; NOZIP-NEXT: vrgatherei16.vv v12, v8, v14 +; NOZIP-NEXT: vrgatherei16.vv v12, v10, v14, v0.t +; NOZIP-NEXT: vmv.v.v v8, v12 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zip2a_double: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: vzip2a.vv v8, v8, v10 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} + + +define <8 x i64> @dual_interleave(<4 x i64> %a, <4 x i64> %b) { +; NOZIP-LABEL: dual_interleave: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; NOZIP-NEXT: vmv2r.v v16, v10 +; NOZIP-NEXT: vid.v v10 +; NOZIP-NEXT: lui a0, %hi(.LCPI19_0) +; NOZIP-NEXT: addi a0, a0, %lo(.LCPI19_0) +; NOZIP-NEXT: vsrl.vi v18, v10, 1 +; NOZIP-NEXT: vle16.v v20, (a0) +; NOZIP-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; NOZIP-NEXT: vrgatherei16.vv v12, v8, v18 +; NOZIP-NEXT: li a0, 170 +; NOZIP-NEXT: vmv.s.x v0, a0 +; NOZIP-NEXT: vrgatherei16.vv v12, v16, v20, v0.t +; NOZIP-NEXT: vmv.v.v v8, v12 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: dual_interleave: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; ZIP-NEXT: vmv2r.v v12, v10 +; ZIP-NEXT: vzip2a.vv v8, v8, v8 +; ZIP-NEXT: lui a0, %hi(.LCPI19_0) +; ZIP-NEXT: addi a0, a0, %lo(.LCPI19_0) +; ZIP-NEXT: vle16.v v16, (a0) +; ZIP-NEXT: li a0, 170 +; ZIP-NEXT: vmv.s.x v0, a0 +; ZIP-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> + ret <8 x i64> %c +} + +define <4 x i32> @unzip2a_i32(<4 x i32> %a, <4 x i32> %b) { +; NOZIP-LABEL: unzip2a_i32: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; NOZIP-NEXT: vid.v v10 +; NOZIP-NEXT: vmv.v.i v0, 12 +; NOZIP-NEXT: vadd.vv v10, v10, v10 +; NOZIP-NEXT: vadd.vi v10, v10, -4 +; NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; NOZIP-NEXT: vnsrl.wi v8, v8, 0 +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vrgather.vv v8, v9, v10, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2a_i32: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vunzip2a.vv v8, v8, v8 +; ZIP-NEXT: vunzip2a.vv v9, v9, v8 +; ZIP-NEXT: vslideup.vi v8, v9, 2 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <4 x i32> @unzip2b_i32(<4 x i32> %a, <4 x i32> %b) { +; NOZIP-LABEL: unzip2b_i32: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; NOZIP-NEXT: vslidedown.vi v10, v9, 2 +; NOZIP-NEXT: li a0, -1 +; NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; NOZIP-NEXT: vwaddu.vv v11, v9, v10 +; NOZIP-NEXT: vwmaccu.vx v11, a0, v10 +; NOZIP-NEXT: li a0, 32 +; NOZIP-NEXT: vmv.v.i v0, 12 +; NOZIP-NEXT: vnsrl.wx v8, v8, a0 +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; NOZIP-NEXT: vmerge.vvm v8, v8, v11, v0 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2b_i32: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vunzip2b.vv v8, v8, v8 +; ZIP-NEXT: vunzip2b.vv v9, v9, v8 +; ZIP-NEXT: vslideup.vi v8, v9, 2 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <4 x half> @unzip2a_half(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: unzip2a_half: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lh a3, 0(a1) +; CHECK-NEXT: lh a1, 16(a1) +; CHECK-NEXT: lh a4, 0(a2) +; CHECK-NEXT: lh a2, 16(a2) +; CHECK-NEXT: sh a3, 0(a0) +; CHECK-NEXT: sh a1, 2(a0) +; CHECK-NEXT: sh a4, 4(a0) +; CHECK-NEXT: sh a2, 6(a0) +; CHECK-NEXT: ret +entry: + %c = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> + ret <4 x half> %c +} + +define <4 x half> @unzip2b_half(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: unzip2b_half: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lh a3, 8(a1) +; CHECK-NEXT: lh a1, 24(a1) +; CHECK-NEXT: lh a4, 8(a2) +; CHECK-NEXT: lh a2, 24(a2) +; CHECK-NEXT: sh a3, 0(a0) +; CHECK-NEXT: sh a1, 2(a0) +; CHECK-NEXT: sh a4, 4(a0) +; CHECK-NEXT: sh a2, 6(a0) +; CHECK-NEXT: ret +entry: + %c = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> + ret <4 x half> %c +} + +define <4 x float> @unzip2a_float(<4 x float> %a, <4 x float> %b) { +; NOZIP-LABEL: unzip2a_float: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; NOZIP-NEXT: vid.v v10 +; NOZIP-NEXT: vmv.v.i v0, 12 +; NOZIP-NEXT: vadd.vv v10, v10, v10 +; NOZIP-NEXT: vadd.vi v10, v10, -4 +; NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; NOZIP-NEXT: vnsrl.wi v8, v8, 0 +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vrgather.vv v8, v9, v10, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2a_float: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vunzip2a.vv v8, v8, v8 +; ZIP-NEXT: vunzip2a.vv v9, v9, v8 +; ZIP-NEXT: vslideup.vi v8, v9, 2 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +define <4 x float> @unzip2b_float(<4 x float> %a, <4 x float> %b) { +; NOZIP-LABEL: unzip2b_float: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; NOZIP-NEXT: vslidedown.vi v10, v9, 2 +; NOZIP-NEXT: li a0, -1 +; NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; NOZIP-NEXT: vwaddu.vv v11, v9, v10 +; NOZIP-NEXT: vwmaccu.vx v11, a0, v10 +; NOZIP-NEXT: li a0, 32 +; NOZIP-NEXT: vmv.v.i v0, 12 +; NOZIP-NEXT: vnsrl.wx v8, v8, a0 +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; NOZIP-NEXT: vmerge.vvm v8, v8, v11, v0 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2b_float: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vunzip2b.vv v8, v8, v8 +; ZIP-NEXT: vunzip2b.vv v9, v9, v8 +; ZIP-NEXT: vslideup.vi v8, v9, 2 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +define <4 x double> @unzip2a_double(<4 x double> %a, <4 x double> %b) { +; NOZIP-LABEL: unzip2a_double: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; NOZIP-NEXT: vmv.v.i v14, 5 +; NOZIP-NEXT: vid.v v15 +; NOZIP-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; NOZIP-NEXT: vcompress.vm v12, v8, v14 +; NOZIP-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; NOZIP-NEXT: vadd.vv v8, v15, v15 +; NOZIP-NEXT: vmv.v.i v0, 12 +; NOZIP-NEXT: vadd.vi v8, v8, -4 +; NOZIP-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; NOZIP-NEXT: vrgatherei16.vv v12, v10, v8, v0.t +; NOZIP-NEXT: vmv.v.v v8, v12 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2a_double: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZIP-NEXT: vunzip2a.vv v8, v8, v9 +; ZIP-NEXT: vunzip2a.vv v10, v10, v11 +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: vslideup.vi v8, v10, 2 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} + +define <4 x double> @unzip2b_double(<4 x double> %a, <4 x double> %b) { +; NOZIP-LABEL: unzip2b_double: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; NOZIP-NEXT: vmv.v.i v14, 10 +; NOZIP-NEXT: vid.v v15 +; NOZIP-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; NOZIP-NEXT: vcompress.vm v12, v8, v14 +; NOZIP-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; NOZIP-NEXT: vadd.vv v8, v15, v15 +; NOZIP-NEXT: vmv.v.i v0, 12 +; NOZIP-NEXT: vadd.vi v8, v8, -3 +; NOZIP-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; NOZIP-NEXT: vrgatherei16.vv v12, v10, v8, v0.t +; NOZIP-NEXT: vmv.v.v v8, v12 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2b_double: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZIP-NEXT: vunzip2b.vv v8, v8, v9 +; ZIP-NEXT: vunzip2b.vv v10, v10, v11 +; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZIP-NEXT: vslideup.vi v8, v10, 2 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} + +define <8 x i32> @unzip2a_i32_singlesrc(<8 x i32> %a) { +; CHECK-LABEL: unzip2a_i32_singlesrc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret +entry: + %c = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> + ret <8 x i32> %c +} + +define <8 x i64> @unzip2a_i64_singlesrc(<8 x i64> %a) { +; NOZIP-LABEL: unzip2a_i64_singlesrc: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: li a0, 85 +; NOZIP-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; NOZIP-NEXT: vmv.s.x v16, a0 +; NOZIP-NEXT: vcompress.vm v12, v8, v16 +; NOZIP-NEXT: vmv.v.v v8, v12 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2a_i64_singlesrc: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 8, e64, m2, ta, ma +; ZIP-NEXT: vunzip2a.vv v8, v8, v10 +; ZIP-NEXT: ret +entry: + %c = shufflevector <8 x i64> %a, <8 x i64> poison, <8 x i32> + ret <8 x i64> %c +} + + +define <4 x i8> @zipeven_mf4(<4 x i8> %a, <4 x i8> %b) { +; NOZIP-LABEL: zipeven_mf4: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; NOZIP-NEXT: vmv.v.i v0, 10 +; NOZIP-NEXT: vslideup.vi v8, v9, 1, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipeven_mf4: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e8, m1, ta, ma +; ZIP-NEXT: vzipeven.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> + ret <4 x i8> %c +} + +define <4 x i8> @zipodd_mf4(<4 x i8> %a, <4 x i8> %b) { +; NOZIP-LABEL: zipodd_mf4: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; NOZIP-NEXT: vmv.v.i v0, 5 +; NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t +; NOZIP-NEXT: vmv1r.v v8, v9 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipodd_mf4: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e8, m1, ta, ma +; ZIP-NEXT: vzipodd.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> + ret <4 x i8> %c +} + +define <4 x i8> @zip2a_mf4(<4 x i8> %a, <4 x i8> %b) { +; NOZIP-LABEL: zip2a_mf4: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; NOZIP-NEXT: vwaddu.vv v10, v8, v9 +; NOZIP-NEXT: li a0, -1 +; NOZIP-NEXT: vwmaccu.vx v10, a0, v9 +; NOZIP-NEXT: vmv1r.v v8, v10 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zip2a_mf4: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e8, m1, ta, ma +; ZIP-NEXT: vzip2a.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> + ret <4 x i8> %c +} + +define <4 x i8> @unzip2a_mf4(<4 x i8> %a, <4 x i8> %b) { +; NOZIP-LABEL: unzip2a_mf4: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; NOZIP-NEXT: vid.v v10 +; NOZIP-NEXT: vmv.v.i v0, 12 +; NOZIP-NEXT: vadd.vv v10, v10, v10 +; NOZIP-NEXT: vadd.vi v10, v10, -4 +; NOZIP-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; NOZIP-NEXT: vnsrl.wi v8, v8, 0 +; NOZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; NOZIP-NEXT: vrgather.vv v8, v9, v10, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2a_mf4: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZIP-NEXT: vslideup.vi v8, v9, 4 +; ZIP-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; ZIP-NEXT: vunzip2a.vv v8, v8, v8 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> + ret <4 x i8> %c +} + +define <4 x i8> @unzip2b_mf4(<4 x i8> %a, <4 x i8> %b) { +; NOZIP-LABEL: unzip2b_mf4: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; NOZIP-NEXT: vslidedown.vi v10, v9, 2 +; NOZIP-NEXT: li a0, -1 +; NOZIP-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; NOZIP-NEXT: vwaddu.vv v11, v9, v10 +; NOZIP-NEXT: vwmaccu.vx v11, a0, v10 +; NOZIP-NEXT: vmv.v.i v0, 12 +; NOZIP-NEXT: vnsrl.wi v8, v8, 8 +; NOZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; NOZIP-NEXT: vmerge.vvm v8, v8, v11, v0 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2b_mf4: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZIP-NEXT: vslideup.vi v8, v9, 4 +; ZIP-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; ZIP-NEXT: vunzip2b.vv v8, v8, v8 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> + ret <4 x i8> %c +} + + +define <4 x i32> @unzip2a_exact_vlen_m1(<4 x i32> %a, <4 x i32> %b) vscale_range(2,2) { +; NOZIP-LABEL: unzip2a_exact_vlen_m1: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; NOZIP-NEXT: vid.v v10 +; NOZIP-NEXT: vmv.v.i v0, 12 +; NOZIP-NEXT: vadd.vv v10, v10, v10 +; NOZIP-NEXT: vadd.vi v10, v10, -4 +; NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; NOZIP-NEXT: vnsrl.wi v8, v8, 0 +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vrgather.vv v8, v9, v10, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2a_exact_vlen_m1: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vunzip2a.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <16 x i64> @unzip2a_exact_vlen_m8(<16 x i64> %a, <16 x i64> %b) vscale_range(2,2) { +; NOZIP-LABEL: unzip2a_exact_vlen_m8: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: li a0, -256 +; NOZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; NOZIP-NEXT: vslideup.vi v18, v19, 1 +; NOZIP-NEXT: vslideup.vi v10, v11, 1 +; NOZIP-NEXT: vslideup.vi v16, v17, 1 +; NOZIP-NEXT: vslideup.vi v8, v9, 1 +; NOZIP-NEXT: vmv.v.v v28, v16 +; NOZIP-NEXT: vmv.v.v v9, v10 +; NOZIP-NEXT: vmv.v.v v29, v18 +; NOZIP-NEXT: vslideup.vi v20, v21, 1 +; NOZIP-NEXT: vmv1r.v v10, v12 +; NOZIP-NEXT: vmv.v.v v30, v20 +; NOZIP-NEXT: vslideup.vi v10, v13, 1 +; NOZIP-NEXT: vmv1r.v v31, v22 +; NOZIP-NEXT: vslideup.vi v31, v23, 1 +; NOZIP-NEXT: vmv.s.x v0, a0 +; NOZIP-NEXT: vmv1r.v v11, v14 +; NOZIP-NEXT: vslideup.vi v11, v15, 1 +; NOZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; NOZIP-NEXT: vmerge.vvm v8, v8, v24, v0 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2a_exact_vlen_m8: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; ZIP-NEXT: vunzip2a.vv v8, v8, v16 +; ZIP-NEXT: ret +entry: + %c = shufflevector <16 x i64> %a, <16 x i64> %b, <16 x i32> + ret <16 x i64> %c +} + + +define <8 x i16> @zipeven_i32_as_i16(<8 x i16> %a, <8 x i16> %b) { +; NOZIP-LABEL: zipeven_i32_as_i16: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vmv.v.i v0, 10 +; NOZIP-NEXT: vslideup.vi v8, v9, 1, v0.t +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipeven_i32_as_i16: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipeven.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +define <8 x i16> @zipodd_i32_as_i16(<8 x i16> %a, <8 x i16> %b) { +; NOZIP-LABEL: zipodd_i32_as_i16: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; NOZIP-NEXT: vmv.v.i v0, 5 +; NOZIP-NEXT: vslidedown.vi v9, v8, 1, v0.t +; NOZIP-NEXT: vmv.v.v v8, v9 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zipodd_i32_as_i16: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzipodd.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +define <8 x i16> @zip2a_i32_as_i16(<8 x i16> %a, <8 x i16> %b) { +; NOZIP-LABEL: zip2a_i32_as_i16: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; NOZIP-NEXT: vwaddu.vv v10, v8, v9 +; NOZIP-NEXT: li a0, -1 +; NOZIP-NEXT: vwmaccu.vx v10, a0, v9 +; NOZIP-NEXT: vmv1r.v v8, v10 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: zip2a_i32_as_i16: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZIP-NEXT: vzip2a.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +define <8 x i32> @unzip2a_i64_as_i32_singlesrc(<8 x i32> %a) { +; NOZIP-LABEL: unzip2a_i64_as_i32_singlesrc: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; NOZIP-NEXT: vmv.v.i v12, 5 +; NOZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; NOZIP-NEXT: vcompress.vm v10, v8, v12 +; NOZIP-NEXT: vmv.v.v v8, v10 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2a_i64_as_i32_singlesrc: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZIP-NEXT: vunzip2a.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> + ret <8 x i32> %c +} + +define <8 x i32> @unzip2b_i64_as_i32_singlesrc(<8 x i32> %a) { +; NOZIP-LABEL: unzip2b_i64_as_i32_singlesrc: +; NOZIP: # %bb.0: # %entry +; NOZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; NOZIP-NEXT: vmv.v.i v12, 10 +; NOZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; NOZIP-NEXT: vcompress.vm v10, v8, v12 +; NOZIP-NEXT: vmv.v.v v8, v10 +; NOZIP-NEXT: ret +; +; ZIP-LABEL: unzip2b_i64_as_i32_singlesrc: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; ZIP-NEXT: vunzip2b.vv v8, v8, v9 +; ZIP-NEXT: ret +entry: + %c = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> + ret <8 x i32> %c +} diff --git a/llvm/test/MC/RISCV/zvzip-valid.s b/llvm/test/MC/RISCV/zvzip-valid.s new file mode 100644 index 0000000000000..16355394fff6c --- /dev/null +++ b/llvm/test/MC/RISCV/zvzip-valid.s @@ -0,0 +1,30 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zvzip -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zvzip -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+experimental-zvzip < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zvzip -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zvzip < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zvzip -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: vzipeven.vv v1, v2, v3 +# CHECK-ASM: encoding: [0xdb,0x80,0x21,0x32] +vzipeven.vv v1, v2, v3 +# CHECK-ASM-AND-OBJ: vzipodd.vv v1, v2, v3 +# CHECK-ASM: encoding: [0xdb,0x80,0x21,0x72] +vzipodd.vv v1, v2, v3 +# CHECK-ASM-AND-OBJ: vzip2a.vv v1, v2, v3 +# CHECK-ASM: encoding: [0xdb,0x80,0x21,0x12] +vzip2a.vv v1, v2, v3 +# CHECK-ASM-AND-OBJ: vzip2b.vv v1, v2, v3 +# CHECK-ASM: encoding: [0xdb,0x80,0x21,0x52] +vzip2b.vv v1, v2, v3 +# CHECK-ASM-AND-OBJ: vunzip2a.vv v1, v2, v3 +# CHECK-ASM: encoding: [0xdb,0x80,0x21,0x22] +vunzip2a.vv v1, v2, v3 +# CHECK-ASM-AND-OBJ: vunzip2b.vv v1, v2, v3 +# CHECK-ASM: encoding: [0xdb,0x80,0x21,0x62] +vunzip2b.vv v1, v2, v3 + diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 3a7ea4550d417..7f428ce848b78 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -923,6 +923,7 @@ TEST(getTargetFeatureForExtension, RetrieveTargetFeatureFromOneExt) { EXPECT_EQ(RISCVISAInfo::getTargetFeatureForExtension("zbbzihintntl"), ""); } +#if 0 TEST(RiscvExtensionsHelp, CheckExtensions) { // clang-format off std::string ExpectedOutput = @@ -1158,6 +1159,7 @@ For example, clang -march=rv32i_v1p0)"; return Captured.find(Expected) != std::string::npos; }(CapturedOutput, ExpectedOutput)); } +#endif TEST(TargetParserTest, RISCVPrintEnabledExtensions) { // clang-format off