diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 37662f79145d6..f79c1fd9278de 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -24001,6 +24001,54 @@ Examples: %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison +.. _int_vp_cttz_elts: + +'``llvm.vp.cttz.elts.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. You can use ```llvm.vp.cttz.elts``` on any +vector of integer elements, both fixed width and scalable. + +:: + + declare i32 @llvm.vp.cttz.elts.i32.v16i32 (<16 x i32> , i1 , <16 x i1> , i32 ) + declare i64 @llvm.vp.cttz.elts.i64.nxv4i32 ( , i1 , , i32 ) + declare i64 @llvm.vp.cttz.elts.i64.v256i1 (<256 x i1> , i1 , <256 x i1> , i32 ) + +Overview: +""""""""" + +This '```llvm.vp.cttz.elts```' intrinsic counts the number of trailing zero +elements of a vector. This is basically the vector-predicated version of +'```llvm.experimental.cttz.elts```'. + +Arguments: +"""""""""" + +The first argument is the vector to be counted. This argument must be a vector +with integer element type. The return type must also be an integer type which is +wide enough to hold the maximum number of elements of the source vector. The +behavior of this intrinsic is undefined if the return type is not wide enough +for the number of elements in the input vector. + +The second argument is a constant flag that indicates whether the intrinsic +returns a valid result if the first argument is all zero. + +The third operand is the vector mask and has the same number of elements as the +input vector type. The fourth operand is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.cttz.elts``' intrinsic counts the trailing (least +significant / lowest-numbered) zero elements in the first operand on each +enabled lane. If the first argument is all zero and the second argument is true, +the result is poison. Otherwise, it returns the explicit vector length (i.e. the +fourth operand). + .. _int_vp_sadd_sat: '``llvm.vp.sadd.sat.*``' Intrinsics diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 661b2841c6ac7..7ed08cfa8a202 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5307,6 +5307,11 @@ class TargetLowering : public TargetLoweringBase { /// \returns The expansion result or SDValue() if it fails. SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const; + /// Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes. + /// \param N Node to expand + /// \returns The expansion result or SDValue() if it fails. + SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const; + /// Expand ABS nodes. Expands vector/scalar ABS nodes, /// vector nodes can only succeed if all operations are legal/custom. /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index a2678d69ce406..28116e5316c96 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2255,6 +2255,12 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg> llvm_i1_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; + + def int_vp_cttz_elts : DefaultAttrsIntrinsic<[ llvm_anyint_ty ], + [ llvm_anyvector_ty, + llvm_i1_ty, + LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; } def int_get_active_lane_mask: diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 1c2708a9e8543..f1cc8bcae467b 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -282,6 +282,15 @@ BEGIN_REGISTER_VP_SDNODE(VP_CTTZ_ZERO_UNDEF, -1, vp_cttz_zero_undef, 1, 2) END_REGISTER_VP_SDNODE(VP_CTTZ_ZERO_UNDEF) END_REGISTER_VP_INTRINSIC(vp_cttz) +// llvm.vp.cttz.elts(x,is_zero_poison,mask,vl) +BEGIN_REGISTER_VP_INTRINSIC(vp_cttz_elts, 2, 3) +VP_PROPERTY_NO_FUNCTIONAL +BEGIN_REGISTER_VP_SDNODE(VP_CTTZ_ELTS, 0, vp_cttz_elts, 1, 2) +END_REGISTER_VP_SDNODE(VP_CTTZ_ELTS) +BEGIN_REGISTER_VP_SDNODE(VP_CTTZ_ELTS_ZERO_UNDEF, 0, vp_cttz_elts_zero_undef, 1, 2) +END_REGISTER_VP_SDNODE(VP_CTTZ_ELTS_ZERO_UNDEF) +END_REGISTER_VP_INTRINSIC(vp_cttz_elts) + // llvm.vp.fshl(x,y,z,mask,vlen) BEGIN_REGISTER_VP(vp_fshl, 3, 4, VP_FSHL, -1) VP_PROPERTY_FUNCTIONAL_INTRINSIC(fshl) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 398b5fee990b5..d33366c44e401 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1227,6 +1227,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction( Node->getOpcode(), Node->getOperand(1).getValueType()); break; + case ISD::VP_CTTZ_ELTS: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TLI.getCustomOperationAction(*Node); @@ -4282,6 +4287,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::VECREDUCE_FMINIMUM: Results.push_back(TLI.expandVecReduce(Node, DAG)); break; + case ISD::VP_CTTZ_ELTS: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + Results.push_back(TLI.expandVPCTTZElements(Node, DAG)); + break; case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 55f9737bc94dd..0aa36deda79dc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -76,6 +76,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VP_CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + case ISD::VP_CTTZ_ELTS: + Res = PromoteIntRes_VP_CttzElements(N); + break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; case ISD::LOAD: Res = PromoteIntRes_LOAD(cast(N)); break; @@ -724,6 +728,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { N->getOperand(2)); } +SDValue DAGTypeLegalizer::PromoteIntRes_VP_CttzElements(SDNode *N) { + SDLoc DL(N); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(N->getOpcode(), DL, NewVT, N->ops()); +} + SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4a2c7b355eb52..49be824deb513 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -309,6 +309,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntRes_CTLZ(SDNode *N); SDValue PromoteIntRes_CTPOP_PARITY(SDNode *N); SDValue PromoteIntRes_CTTZ(SDNode *N); + SDValue PromoteIntRes_VP_CttzElements(SDNode *N); SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N); @@ -912,6 +913,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SplitVecOp_FP_ROUND(SDNode *N); SDValue SplitVecOp_FPOpDifferentTypes(SDNode *N); SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N); + SDValue SplitVecOp_VP_CttzElements(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp @@ -1019,6 +1021,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N); SDValue WidenVecOp_VP_REDUCE(SDNode *N); SDValue WidenVecOp_ExpOp(SDNode *N); + SDValue WidenVecOp_VP_CttzElements(SDNode *N); /// Helper function to generate a set of operations to perform /// a vector operation for a wider type. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 985c9f16ab97c..cab4dc5f3c156 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3098,6 +3098,10 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VP_REDUCE_FMIN: Res = SplitVecOp_VP_REDUCE(N, OpNo); break; + case ISD::VP_CTTZ_ELTS: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + Res = SplitVecOp_VP_CttzElements(N); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -4056,6 +4060,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } +SDValue DAGTypeLegalizer::SplitVecOp_VP_CttzElements(SDNode *N) { + SDLoc DL(N); + EVT ResVT = N->getValueType(0); + + SDValue Lo, Hi; + SDValue VecOp = N->getOperand(0); + GetSplitVector(VecOp, Lo, Hi); + + auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1)); + auto [EVLLo, EVLHi] = + DAG.SplitEVL(N->getOperand(2), VecOp.getValueType(), DL); + SDValue VLo = DAG.getZExtOrTrunc(EVLLo, DL, ResVT); + + // if VP_CTTZ_ELTS(Lo) != EVLLo => VP_CTTZ_ELTS(Lo). + // else => EVLLo + (VP_CTTZ_ELTS(Hi) or VP_CTTZ_ELTS_ZERO_UNDEF(Hi)). + SDValue ResLo = DAG.getNode(ISD::VP_CTTZ_ELTS, DL, ResVT, Lo, MaskLo, EVLLo); + SDValue ResLoNotEVL = + DAG.getSetCC(DL, getSetCCResultType(ResVT), ResLo, VLo, ISD::SETNE); + SDValue ResHi = DAG.getNode(N->getOpcode(), DL, ResVT, Hi, MaskHi, EVLHi); + return DAG.getSelect(DL, ResVT, ResLoNotEVL, ResLo, + DAG.getNode(ISD::ADD, DL, ResVT, VLo, ResHi)); +} + //===----------------------------------------------------------------------===// // Result Vector Widening //===----------------------------------------------------------------------===// @@ -6161,6 +6188,10 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VP_REDUCE_FMIN: Res = WidenVecOp_VP_REDUCE(N); break; + case ISD::VP_CTTZ_ELTS: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + Res = WidenVecOp_VP_CttzElements(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -6924,6 +6955,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { DAG.getVectorIdxConstant(0, DL)); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_CttzElements(SDNode *N) { + SDLoc DL(N); + SDValue Source = GetWidenedVector(N->getOperand(0)); + EVT SrcVT = Source.getValueType(); + SDValue Mask = + GetWidenedMask(N->getOperand(1), SrcVT.getVectorElementCount()); + + return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), + {Source, Mask, N->getOperand(2)}, N->getFlags()); +} + //===----------------------------------------------------------------------===// // Vector Widening Utilities //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5caf868c83a29..cfd82a342433f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8076,6 +8076,11 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ; break; } + case Intrinsic::vp_cttz_elts: { + bool IsZeroPoison = cast(VPIntrin.getArgOperand(1))->isOne(); + ResOPC = IsZeroPoison ? ISD::VP_CTTZ_ELTS_ZERO_UNDEF : ISD::VP_CTTZ_ELTS; + break; + } #define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \ case Intrinsic::VPID: \ ResOPC = ISD::VPSD; \ @@ -8428,7 +8433,9 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( case ISD::VP_CTLZ: case ISD::VP_CTLZ_ZERO_UNDEF: case ISD::VP_CTTZ: - case ISD::VP_CTTZ_ZERO_UNDEF: { + case ISD::VP_CTTZ_ZERO_UNDEF: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + case ISD::VP_CTTZ_ELTS: { SDValue Result = DAG.getNode(Opcode, DL, VTs, {OpValues[0], OpValues[2], OpValues[3]}); setValue(&VPIntrin, Result); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cdc1227fd572d..336d89fbcf638 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9074,6 +9074,39 @@ SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const { return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL); } +SDValue TargetLowering::expandVPCTTZElements(SDNode *N, + SelectionDAG &DAG) const { + // %cond = to_bool_vec %source + // %splat = splat /*val=*/VL + // %tz = step_vector + // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat + // %r = vp.reduce.umin %v + SDLoc DL(N); + SDValue Source = N->getOperand(0); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + EVT SrcVT = Source.getValueType(); + EVT ResVT = N->getValueType(0); + EVT ResVecVT = + EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount()); + + // Convert to boolean vector. + if (SrcVT.getScalarType() != MVT::i1) { + SDValue AllZero = DAG.getConstant(0, DL, SrcVT); + SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + SrcVT.getVectorElementCount()); + Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero, + DAG.getCondCode(ISD::SETNE), Mask, EVL); + } + + SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT); + SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL); + SDValue StepVec = DAG.getStepVector(DL, ResVecVT); + SDValue Select = + DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL); + return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL); +} + SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative) const { SDLoc dl(N); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index fe8edcf39681d..454b486b797b1 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -698,7 +698,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT, - ISD::VP_USUBSAT}; + ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, @@ -759,6 +759,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, Expand); + setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT, + Custom); + setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom); setOperationAction( @@ -5341,6 +5344,44 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, return Res; } +SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT XLenVT = Subtarget.getXLenVT(); + SDValue Source = Op->getOperand(0); + MVT SrcVT = Source.getSimpleValueType(); + SDValue Mask = Op->getOperand(1); + SDValue EVL = Op->getOperand(2); + + if (SrcVT.isFixedLengthVector()) { + MVT ContainerVT = getContainerForFixedLengthVector(SrcVT); + Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget); + Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, + Subtarget); + SrcVT = ContainerVT; + } + + // Convert to boolean vector. + if (SrcVT.getScalarType() != MVT::i1) { + SDValue AllZero = DAG.getConstant(0, DL, SrcVT); + SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount()); + Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT, + {Source, AllZero, DAG.getCondCode(ISD::SETNE), + DAG.getUNDEF(SrcVT), Mask, EVL}); + } + + SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL); + if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF) + // In this case, we can interpret poison as -1, so nothing to do further. + return Res; + + // Convert -1 to VL. + SDValue SetCC = + DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT); + Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res); + return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res); +} + // While RVV has alignment restrictions, we should always be able to load as a // legal equivalently-sized byte-typed vector instead. This method is // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If @@ -6595,6 +6636,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1) return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); return lowerVPREDUCE(Op, DAG); + case ISD::VP_CTTZ_ELTS: + case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: + return lowerVPCttzElements(Op, DAG); case ISD::UNDEF: { MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType()); return convertFromScalableVector(Op.getSimpleValueType(), diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index ed14fd4539438..78f99e70c083a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -961,6 +961,7 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVPCttzElements(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll b/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll new file mode 100644 index 0000000000000..8b368bfaab08e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll @@ -0,0 +1,240 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr='+v' -verify-machineinstrs | FileCheck %s --check-prefix=RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr='+v' -verify-machineinstrs | FileCheck %s --check-prefix=RV64 + +define iXLen @bool_vec( %src, %m, i32 %evl) { +; RV32-LABEL: bool_vec: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vfirst.m a1, v9, v0.t +; RV32-NEXT: bltz a1, .LBB0_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB0_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bool_vec: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vfirst.m a1, v9, v0.t +; RV64-NEXT: bltz a1, .LBB0_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB0_2: +; RV64-NEXT: ret + %r = call iXLen @llvm.vp.cttz.elts.iXLen.nxv2i1( %src, i1 0, %m, i32 %evl) + ret iXLen %r +} + +define iXLen @bool_vec_zero_poison( %src, %m, i32 %evl) { +; RV32-LABEL: bool_vec_zero_poison: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v9, v0 +; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vfirst.m a0, v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: bool_vec_zero_poison: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v9, v0 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vfirst.m a0, v9, v0.t +; RV64-NEXT: ret + %r = call iXLen @llvm.vp.cttz.elts.iXLen.nxv2i1( %src, i1 1, %m, i32 %evl) + ret iXLen %r +} + +define iXLen @nxv2i32( %src, %m, i32 %evl) { +; RV32-LABEL: nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vmsne.vi v8, v8, 0, v0.t +; RV32-NEXT: vfirst.m a1, v8, v0.t +; RV32-NEXT: bltz a1, .LBB2_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB2_2: +; RV32-NEXT: ret +; +; RV64-LABEL: nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-NEXT: vmsne.vi v8, v8, 0, v0.t +; RV64-NEXT: vfirst.m a1, v8, v0.t +; RV64-NEXT: bltz a1, .LBB2_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB2_2: +; RV64-NEXT: ret + %r = call iXLen @llvm.vp.cttz.elts.iXLen.nxv2i32( %src, i1 0, %m, i32 %evl) + ret iXLen %r +} + +define iXLen @nxv2i32_zero_poison( %src, %m, i32 %evl) { +; RV32-LABEL: nxv2i32_zero_poison: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vmsne.vi v8, v8, 0, v0.t +; RV32-NEXT: vfirst.m a0, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: nxv2i32_zero_poison: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV64-NEXT: vmsne.vi v8, v8, 0, v0.t +; RV64-NEXT: vfirst.m a0, v8, v0.t +; RV64-NEXT: ret + %r = call iXLen @llvm.vp.cttz.elts.iXLen.nxv2i32( %src, i1 1, %m, i32 %evl) + ret iXLen %r +} + +define iXLen @nxv2i64( %src, %m, i32 %evl) { +; RV32-LABEL: nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vmsne.vi v10, v8, 0, v0.t +; RV32-NEXT: vfirst.m a1, v10, v0.t +; RV32-NEXT: bltz a1, .LBB4_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB4_2: +; RV32-NEXT: ret +; +; RV64-LABEL: nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64-NEXT: vmsne.vi v10, v8, 0, v0.t +; RV64-NEXT: vfirst.m a1, v10, v0.t +; RV64-NEXT: bltz a1, .LBB4_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB4_2: +; RV64-NEXT: ret + %r = call iXLen @llvm.vp.cttz.elts.iXLen.nxv2i64( %src, i1 0, %m, i32 %evl) + ret iXLen %r +} + +define iXLen @nxv2i64_zero_poison( %src, %m, i32 %evl) { +; RV32-LABEL: nxv2i64_zero_poison: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vmsne.vi v10, v8, 0, v0.t +; RV32-NEXT: vfirst.m a0, v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: nxv2i64_zero_poison: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV64-NEXT: vmsne.vi v10, v8, 0, v0.t +; RV64-NEXT: vfirst.m a0, v10, v0.t +; RV64-NEXT: ret + %r = call iXLen @llvm.vp.cttz.elts.iXLen.nxv2i64( %src, i1 1, %m, i32 %evl) + ret iXLen %r +} + +define i1 @nxv2i32_cmp_evl( %src, %m, i32 %evl) { +; RV32-LABEL: nxv2i32_cmp_evl: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; RV32-NEXT: vmsne.vi v8, v8, 0, v0.t +; RV32-NEXT: vfirst.m a2, v8, v0.t +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltz a2, .LBB6_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB6_2: +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: nxv2i32_cmp_evl: +; RV64: # %bb.0: +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV64-NEXT: vmsne.vi v8, v8, 0, v0.t +; RV64-NEXT: vfirst.m a2, v8, v0.t +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: bltz a2, .LBB6_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, a2 +; RV64-NEXT: .LBB6_2: +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: ret + %r = call i32 @llvm.vp.cttz.elts.i32.nxv2i32( %src, i1 0, %m, i32 %evl) + %cmp = icmp eq i32 %r, %evl + ret i1 %cmp +} + +define iXLen @fixed_v2i64(<2 x i64> %src, <2 x i1> %m, i32 %evl) { +; RV32-LABEL: fixed_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vmsne.vi v8, v8, 0, v0.t +; RV32-NEXT: vfirst.m a1, v8, v0.t +; RV32-NEXT: bltz a1, .LBB7_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB7_2: +; RV32-NEXT: ret +; +; RV64-LABEL: fixed_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vmsne.vi v8, v8, 0, v0.t +; RV64-NEXT: vfirst.m a1, v8, v0.t +; RV64-NEXT: bltz a1, .LBB7_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB7_2: +; RV64-NEXT: ret + %r = call iXLen @llvm.vp.cttz.elts.iXLen.v2i64(<2 x i64> %src, i1 0, <2 x i1> %m, i32 %evl) + ret iXLen %r +} + +define iXLen @fixed_v2i64_zero_poison(<2 x i64> %src, <2 x i1> %m, i32 %evl) { +; RV32-LABEL: fixed_v2i64_zero_poison: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vmsne.vi v8, v8, 0, v0.t +; RV32-NEXT: vfirst.m a0, v8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: fixed_v2i64_zero_poison: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vmsne.vi v8, v8, 0, v0.t +; RV64-NEXT: vfirst.m a0, v8, v0.t +; RV64-NEXT: ret + %r = call iXLen @llvm.vp.cttz.elts.iXLen.v2i64(<2 x i64> %src, i1 1, <2 x i1> %m, i32 %evl) + ret iXLen %r +} + +declare iXLen @llvm.vp.cttz.elts.iXLen.nxv2i1(, i1, , i32) +declare iXLen @llvm.vp.cttz.elts.iXLen.nxv2i32(, i1, , i32) +declare iXLen @llvm.vp.cttz.elts.iXLen.nxv2i64(, i1, , i32) +declare iXLen @llvm.vp.cttz.elts.iXLen.v2i64(<2 x i64>, i1, <2 x i1>, i32)