diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index c97a7ae372bc6..6d90c1bf7e4aa 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -21547,6 +21547,42 @@ Examples: llvm.experimental.vp.splice(, , -2, 3, 2); ==> trailing elements +.. _int_experimental_vp_reverse: + + +'``llvm.experimental.vp.reverse``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> %vec, <2 x i1> %mask, i32 %evl) + declare @llvm.experimental.vp.reverse.nxv4i32( %vec, %mask, i32 %evl) + +Overview: +""""""""" + +The '``llvm.experimental.vp.reverse.*``' intrinsic is the vector length +predicated version of the '``llvm.experimental.vector.reverse.*``' intrinsic. + +Arguments: +"""""""""" + +The result and the first argument ``vec`` are vectors with the same type. +The second argument ``mask`` is a vector mask and has the same number of +elements as the result. The third argument is the explicit vector length of +the operation. + +Semantics: +"""""""""" + +This intrinsic reverses the order of the first ``evl`` elements in a vector. +The lanes in the result vector disabled by ``mask`` are ``poison``. The +elements past ``evl`` are poison. + .. _int_vp_load: '``llvm.vp.load``' Intrinsic diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 80c3d8d403d91..8ad08ce8a3082 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2176,6 +2176,13 @@ def int_experimental_vp_splice: llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; +def int_experimental_vp_reverse: + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem]>; + def int_vp_is_fpclass: DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [ llvm_anyvector_ty, diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 55a68ff5768dd..5ab4b98dd805c 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -702,6 +702,12 @@ BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, EXPERIMENTAL_VP_SPLICE, -1) VP_PROPERTY_FUNCTIONAL_INTRINSIC(experimental_vector_splice) END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE) +// llvm.experimental.vp.reverse(x,mask,vlen) +BEGIN_REGISTER_VP(experimental_vp_reverse, 1, 2, + EXPERIMENTAL_VP_REVERSE, -1) +VP_PROPERTY_NO_FUNCTIONAL +END_REGISTER_VP(experimental_vp_reverse, EXPERIMENTAL_VP_REVERSE) + ///// } Shuffles #undef BEGIN_REGISTER_VP diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index ee4278ceb729b..d8b2792cc05f9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -888,6 +888,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_VECTOR_INTERLEAVE(SDNode *N); void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi); // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 88e61ff3a3c6f..894c654020199 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1209,6 +1209,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UDIVFIXSAT: SplitVecRes_FIX(N, Lo, Hi); break; + case ISD::EXPERIMENTAL_VP_REVERSE: + SplitVecRes_VP_REVERSE(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -2857,6 +2860,56 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); } +void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT VT = N->getValueType(0); + SDValue Val = N->getOperand(0); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + SDLoc DL(N); + + // Fallback to VP_STRIDED_STORE to stack followed by VP_LOAD. + Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false); + + EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorElementCount()); + SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment); + EVT PtrVT = StackPtr.getValueType(); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand( + PtrInfo, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, + Alignment); + MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, + Alignment); + + unsigned EltWidth = VT.getScalarSizeInBits() / 8; + SDValue NumElemMinus1 = + DAG.getNode(ISD::SUB, DL, PtrVT, DAG.getZExtOrTrunc(EVL, DL, PtrVT), + DAG.getConstant(1, DL, PtrVT)); + SDValue StartOffset = DAG.getNode(ISD::MUL, DL, PtrVT, NumElemMinus1, + DAG.getConstant(EltWidth, DL, PtrVT)); + SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, StartOffset); + SDValue Stride = DAG.getConstant(-(int64_t)EltWidth, DL, PtrVT); + + SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT); + SDValue Store = DAG.getStridedStoreVP(DAG.getEntryNode(), DL, Val, StorePtr, + DAG.getUNDEF(PtrVT), Stride, TrueMask, + EVL, MemVT, StoreMMO, ISD::UNINDEXED); + + SDValue Load = DAG.getLoadVP(VT, DL, Store, StackPtr, Mask, EVL, LoadMMO); + + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Load, + DAG.getVectorIdxConstant(0, DL)); + Hi = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Load, + DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); +} + void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) { SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index beb371063f89b..f32ee22f234aa 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -597,7 +597,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, - ISD::VP_ABS}; + ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, @@ -609,7 +609,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, - ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS}; + ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, + ISD::EXPERIMENTAL_VP_REVERSE}; static const unsigned IntegerVecReduceOps[] = { ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, @@ -694,6 +695,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); + setOperationPromotedToType( ISD::VECTOR_SPLICE, VT, MVT::getVectorVT(MVT::i8, VT.getVectorElementCount())); @@ -1064,6 +1067,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_TRUNCATE}, VT, Custom); + + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); continue; } @@ -1315,7 +1320,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, - ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}); + ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, + ISD::EXPERIMENTAL_VP_REVERSE}); if (Subtarget.hasVendorXTHeadMemPair()) setTargetDAGCombine({ISD::LOAD, ISD::STORE}); if (Subtarget.useRVVForFixedLengthVectors()) @@ -6406,6 +6412,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, !Subtarget.hasVInstructionsF16())) return SplitVPOp(Op, DAG); return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); + case ISD::EXPERIMENTAL_VP_REVERSE: + return lowerVPReverseExperimental(Op, DAG); } } @@ -10223,6 +10231,127 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, return convertFromScalableVector(VT, Result, DAG, Subtarget); } +SDValue +RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + MVT XLenVT = Subtarget.getXLenVT(); + + SDValue Op1 = Op.getOperand(0); + SDValue Mask = Op.getOperand(1); + SDValue EVL = Op.getOperand(2); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + MVT MaskVT = getMaskTypeFor(ContainerVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + + MVT GatherVT = ContainerVT; + MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger(); + // Check if we are working with mask vectors + bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1; + if (IsMaskVector) { + GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8); + + // Expand input operand + SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, + DAG.getUNDEF(IndicesVT), + DAG.getConstant(1, DL, XLenVT), EVL); + SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, + DAG.getUNDEF(IndicesVT), + DAG.getConstant(0, DL, XLenVT), EVL); + Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, IndicesVT, Op1, SplatOne, + SplatZero, EVL); + } + + unsigned EltSize = GatherVT.getScalarSizeInBits(); + unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue(); + unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); + unsigned MaxVLMAX = + RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); + + unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; + // If this is SEW=8 and VLMAX is unknown or more than 256, we need + // to use vrgatherei16.vv. + // TODO: It's also possible to use vrgatherei16.vv for other types to + // decrease register width for the index calculation. + // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. + if (MaxVLMAX > 256 && EltSize == 8) { + // If this is LMUL=8, we have to split before using vrgatherei16.vv. + // Split the vector in half and reverse each half using a full register + // reverse. + // Swap the halves and concatenate them. + // Slide the concatenated result by (VLMax - VL). + if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT); + auto [Lo, Hi] = DAG.SplitVector(Op1, DL); + + SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); + SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); + + // Reassemble the low and high pieces reversed. + // NOTE: this Result is unmasked (because we do not need masks for + // shuffles). If in the future this has to change, we can use a SELECT_VL + // between Result and UNDEF using the mask originally passed to VP_REVERSE + SDValue Result = + DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev); + + // Slide off any elements from past EVL that were reversed into the low + // elements. + unsigned MinElts = GatherVT.getVectorMinNumElements(); + SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, + DAG.getConstant(MinElts, DL, XLenVT)); + SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL); + + Result = getVSlidedown(DAG, Subtarget, DL, GatherVT, + DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL); + + if (IsMaskVector) { + // Truncate Result back to a mask vector + Result = + DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, + {Result, DAG.getConstant(0, DL, GatherVT), + DAG.getCondCode(ISD::SETNE), + DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); + } + + if (!VT.isFixedLengthVector()) + return Result; + return convertFromScalableVector(VT, Result, DAG, Subtarget); + } + + // Just promote the int type to i16 which will double the LMUL. + IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount()); + GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; + } + + SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL); + SDValue VecLen = + DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT)); + SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, + DAG.getUNDEF(IndicesVT), VecLen, EVL); + SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID, + DAG.getUNDEF(IndicesVT), Mask, EVL); + SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB, + DAG.getUNDEF(GatherVT), Mask, EVL); + + if (IsMaskVector) { + // Truncate Result back to a mask vector + Result = DAG.getNode( + RISCVISD::SETCC_VL, DL, ContainerVT, + {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE), + DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); + } + + if (!VT.isFixedLengthVector()) + return Result; + return convertFromScalableVector(VT, Result, DAG, Subtarget); +} + SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 5ca6376f858c4..8c10c9839e16d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -895,6 +895,7 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll new file mode 100644 index 0000000000000..cc13a97ddce0e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: < %s | FileCheck %s + +define <2 x double> @test_vp_reverse_v2f64_masked(<2 x double> %src, <2 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v2f64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> %src, <2 x i1> %mask, i32 %evl) + ret <2 x double> %dst +} + +define <2 x double> @test_vp_reverse_v2f64(<2 x double> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 1, i32 0 + %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + + %dst = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> %src, <2 x i1> %allones, i32 %evl) + ret <2 x double> %dst +} + +define <4 x float> @test_vp_reverse_v4f32_masked(<4 x float> %src, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v4f32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> %src, <4 x i1> %mask, i32 %evl) + ret <4 x float> %dst +} + +define <4 x float> @test_vp_reverse_v4f32(<4 x float> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 1, i32 0 + %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + + %dst = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> %src, <4 x i1> %allones, i32 %evl) + ret <4 x float> %dst +} + +declare <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double>,<2 x i1>,i32) +declare <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float>,<4 x i1>,i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll new file mode 100644 index 0000000000000..adc1ca6c85868 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll @@ -0,0 +1,266 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v -verify-machineinstrs < %s | FileCheck %s + +define @test_vp_reverse_nxv1f64_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv1f64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv1f64( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv1f64( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv1f64( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv2f32_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv2f32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv2f32( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv2f32( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv2f32( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv2f64_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv2f64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv2f64( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv2f64( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv2f64( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4f32_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4f32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv4f32( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4f32( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv4f32( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4f64_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4f64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv4f64( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4f64( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv4f64( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8f32_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8f32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vid.v v12, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv8f32( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8f32( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv8f32( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8f64_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8f64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vid.v v16, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv8f64( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8f64( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv8f64( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv16f32_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv16f32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vid.v v16, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv16f32( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv16f32( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv16f32( %src, %allones, i32 %evl) + ret %dst +} + +; LMUL = 1 +declare @llvm.experimental.vp.reverse.nxv1f64(,,i32) +declare @llvm.experimental.vp.reverse.nxv2f32(,,i32) + +; LMUL = 2 +declare @llvm.experimental.vp.reverse.nxv2f64(,,i32) +declare @llvm.experimental.vp.reverse.nxv4f32(,,i32) + +; LMUL = 4 +declare @llvm.experimental.vp.reverse.nxv4f64(,,i32) +declare @llvm.experimental.vp.reverse.nxv8f32(,,i32) + +; LMUL = 8 +declare @llvm.experimental.vp.reverse.nxv8f64(,,i32) +declare @llvm.experimental.vp.reverse.nxv16f32(,,i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll new file mode 100644 index 0000000000000..d7fc8838f430d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: < %s | FileCheck %s + +define <2 x i64> @test_vp_reverse_v2i64_masked(<2 x i64> %src, <2 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v2i64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> %src, <2 x i1> %mask, i32 %evl) + ret <2 x i64> %dst +} + +define <2 x i64> @test_vp_reverse_v2i64(<2 x i64> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 1, i32 0 + %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + + %dst = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> %src, <2 x i1> %allones, i32 %evl) + ret <2 x i64> %dst +} + +define <4 x i32> @test_vp_reverse_v4i32_masked(<4 x i32> %src, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v4i32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> %src, <4 x i1> %mask, i32 %evl) + ret <4 x i32> %dst +} + +define <4 x i32> @test_vp_reverse_v4i32(<4 x i32> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 1, i32 0 + %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + + %dst = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> %src, <4 x i1> %allones, i32 %evl) + ret <4 x i32> %dst +} + +define <8 x i16> @test_vp_reverse_v8i16_masked(<8 x i16> %src, <8 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v8i16_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> %src, <8 x i1> %mask, i32 %evl) + ret <8 x i16> %dst +} + +define <8 x i16> @test_vp_reverse_v8i16(<8 x i16> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 1, i32 0 + %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + + %dst = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> %src, <8 x i1> %allones, i32 %evl) + ret <8 x i16> %dst +} + +define <16 x i8> @test_vp_reverse_v16i8_masked(<16 x i8> %src, <16 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v16i8_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vrgatherei16.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> %src, <16 x i1> %mask, i32 %evl) + ret <16 x i8> %dst +} + +define <16 x i8> @test_vp_reverse_v16i8(<16 x i8> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v10, v10, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vrgatherei16.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 1, i32 0 + %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + + %dst = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> %src, <16 x i1> %allones, i32 %evl) + ret <16 x i8> %dst +} + +declare <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64>,<2 x i1>,i32) +declare <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32>,<4 x i1>,i32) +declare <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16>,<8 x i1>,i32) +declare <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8>,<16 x i1>,i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll new file mode 100644 index 0000000000000..47df1b005a0f8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll @@ -0,0 +1,595 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s + +define @test_vp_reverse_nxv1i64_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv1i64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv1i64( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv1i64( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv1i64( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv2i32_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv2i32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv2i32( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv2i32( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv2i32( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4i16_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4i16_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv4i16( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4i16( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv4i16( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8i8_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8i8_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vrgatherei16.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv8i8( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8i8( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v10, v10, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vrgatherei16.vv v9, v8, v10 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv8i8( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv2i64_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv2i64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv2i64( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv2i64( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv2i64( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4i32_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4i32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv4i32( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4i32( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv4i32( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8i16_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8i16_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv8i16( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8i16( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv8i16( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv16i8_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv16i8_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vid.v v12, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv16i8( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv16i8( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v12, v12, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv16i8( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4i64_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4i64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv4i64( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4i64( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv4i64( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8i32_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8i32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vid.v v12, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv8i32( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8i32( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv8i32( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv16i16_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv16i16_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vid.v v12, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv16i16( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv16i16( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv16i16( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv32i8_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv32i8_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv32i8( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv32i8( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v16, v16, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv32i8( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8i64_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8i64_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vid.v v16, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv8i64( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8i64( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv8i64( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv16i32_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv16i32_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vid.v v16, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv16i32( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv16i32( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv16i32( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv32i16_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv32i16_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv32i16( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv32i16( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1 +; CHECK-NEXT: vrgather.vv v16, v8, v24 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv32i16( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv64i8_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv64i8_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a2 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v20, v8, v24 +; CHECK-NEXT: vrgatherei16.vv v16, v12, v24 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a1, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v16, a1, v0.t +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv64i8( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv64i8( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v16, v16, a2 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v28, v8, v16 +; CHECK-NEXT: vrgatherei16.vv v24, v12, v16 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a1, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v24, a1 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv64i8( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv128i8( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: addi s0, sp, 80 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 4 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: addi a3, sp, 64 +; CHECK-NEXT: add a4, a0, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: li a5, -1 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vsse8.v v8, (a4), a5 +; CHECK-NEXT: sub a4, a4, a2 +; CHECK-NEXT: sub a6, a0, a1 +; CHECK-NEXT: sltu a0, a0, a6 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a6 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsse8.v v16, (a4), a5 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: vle8.v v16, (a1) +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vle8.v v8, (a3) +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv128i8( %src, %allones, i32 %evl) + ret %dst +} + +; LMUL = 1 +declare @llvm.experimental.vp.reverse.nxv1i64(,,i32) +declare @llvm.experimental.vp.reverse.nxv2i32(,,i32) +declare @llvm.experimental.vp.reverse.nxv4i16(,,i32) +declare @llvm.experimental.vp.reverse.nxv8i8(,,i32) + +; LMUL = 2 +declare @llvm.experimental.vp.reverse.nxv2i64(,,i32) +declare @llvm.experimental.vp.reverse.nxv4i32(,,i32) +declare @llvm.experimental.vp.reverse.nxv8i16(,,i32) +declare @llvm.experimental.vp.reverse.nxv16i8(,,i32) + +; LMUL = 4 +declare @llvm.experimental.vp.reverse.nxv4i64(,,i32) +declare @llvm.experimental.vp.reverse.nxv8i32(,,i32) +declare @llvm.experimental.vp.reverse.nxv16i16(,,i32) +declare @llvm.experimental.vp.reverse.nxv32i8(,,i32) + +; LMUL = 8 +declare @llvm.experimental.vp.reverse.nxv8i64(,,i32) +declare @llvm.experimental.vp.reverse.nxv16i32(,,i32) +declare @llvm.experimental.vp.reverse.nxv32i16(,,i32) +declare @llvm.experimental.vp.reverse.nxv64i8(,,i32) + +declare @llvm.experimental.vp.reverse.nxv128i8(,,i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll new file mode 100644 index 0000000000000..fd608c858650e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: < %s | FileCheck %s + +define <2 x i1> @test_vp_reverse_v2i1_masked(<2 x i1> %src, <2 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v2i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: ret + %dst = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> %src, <2 x i1> %mask, i32 %evl) + ret <2 x i1> %dst +} + +define <2 x i1> @test_vp_reverse_v2i1(<2 x i1> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> undef, i1 1, i32 0 + %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + + %dst = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> %src, <2 x i1> %allones, i32 %evl) + ret <2 x i1> %dst +} + +define <4 x i1> @test_vp_reverse_v4i1_masked(<4 x i1> %src, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v4i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: ret + %dst = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> %src, <4 x i1> %mask, i32 %evl) + ret <4 x i1> %dst +} + +define <4 x i1> @test_vp_reverse_v4i1(<4 x i1> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> undef, i1 1, i32 0 + %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer + + %dst = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> %src, <4 x i1> %allones, i32 %evl) + ret <4 x i1> %dst +} + +define <8 x i1> @test_vp_reverse_v8i1_masked(<8 x i1> %src, <8 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v8i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: ret + %dst = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> %src, <8 x i1> %mask, i32 %evl) + ret <8 x i1> %dst +} + +define <8 x i1> @test_vp_reverse_v8i1(<8 x i1> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> undef, i1 1, i32 0 + %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer + + %dst = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> %src, <8 x i1> %allones, i32 %evl) + ret <8 x i1> %dst +} + +define <16 x i1> @test_vp_reverse_v16i1_masked(<16 x i1> %src, <16 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v16i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t +; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t +; CHECK-NEXT: ret + %dst = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> %src, <16 x i1> %mask, i32 %evl) + ret <16 x i1> %dst +} + +define <16 x i1> @test_vp_reverse_v16i1(<16 x i1> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v11, v10, v8 +; CHECK-NEXT: vmsne.vi v0, v11, 0 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> undef, i1 1, i32 0 + %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer + + %dst = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> %src, <16 x i1> %allones, i32 %evl) + ret <16 x i1> %dst +} + +declare <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1>,<2 x i1>,i32) +declare <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1>,<4 x i1>,i32) +declare <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1>,<8 x i1>,i32) +declare <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1>,<16 x i1>,i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll new file mode 100644 index 0000000000000..29917141fffed --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll @@ -0,0 +1,301 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s + +define @test_vp_reverse_nxv1i1_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv1i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv1i1( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv1i1( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv1i1( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv2i1_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv2i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv2i1( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv2i1( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv2i1( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4i1_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv4i1( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv4i1( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv4i1( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8i1_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t +; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv8i1( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv8i1( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v11, v10, v8 +; CHECK-NEXT: vmsne.vi v0, v11, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv8i1( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv16i1_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv16i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v12, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v16, v10, v12, v0.t +; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv16i1( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv16i1( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v14, v12, v8 +; CHECK-NEXT: vmsne.vi v0, v14, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv16i1( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv32i1_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv32i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v16, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v24, v12, v16, v0.t +; CHECK-NEXT: vmsne.vi v8, v24, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv32i1( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv32i1( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v20, v16, v8 +; CHECK-NEXT: vmsne.vi v0, v20, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv32i1( %src, %allones, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv64i1_masked( %src, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv64i1_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v0, v16, a2 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v20, v24, v0 +; CHECK-NEXT: vrgatherei16.vv v16, v28, v0 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a1, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vslidedown.vx v16, v16, a1, v0.t +; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %dst = call @llvm.experimental.vp.reverse.nxv64i1( %src, %mask, i32 %evl) + ret %dst +} + +define @test_vp_reverse_nxv64i1( %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a2 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vrgatherei16.vv v20, v8, v24 +; CHECK-NEXT: vrgatherei16.vv v16, v12, v24 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a1, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v16, a1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %dst = call @llvm.experimental.vp.reverse.nxv64i1( %src, %allones, i32 %evl) + ret %dst +} + +declare @llvm.experimental.vp.reverse.nxv1i1(,,i32) +declare @llvm.experimental.vp.reverse.nxv2i1(,,i32) +declare @llvm.experimental.vp.reverse.nxv4i1(,,i32) +declare @llvm.experimental.vp.reverse.nxv8i1(,,i32) +declare @llvm.experimental.vp.reverse.nxv16i1(,,i32) +declare @llvm.experimental.vp.reverse.nxv32i1(,,i32) +declare @llvm.experimental.vp.reverse.nxv64i1(,,i32) diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp index 9590d9b068ecf..a3bef3d42adb0 100644 --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -148,6 +148,8 @@ class VPIntrinsicTest : public testing::Test { Str << " declare <8 x i1> @llvm.vp.icmp.v8i16" << "(<8 x i16>, <8 x i16>, metadata, <8 x i1>, i32) "; + Str << " declare <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32>, " + "<8 x i1>, i32) "; Str << " declare <8 x i16> @llvm.vp.abs.v8i16" << "(<8 x i16>, i1 immarg, <8 x i1>, i32) "; Str << " declare <8 x i16> @llvm.vp.bitreverse.v8i16"