diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 25a1dccd7b75a..cfd45d6e6aa5c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1058,13 +1058,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(IntegerVPOps, VT, Custom); - // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the - // range of f32. - EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); - if (isTypeLegal(FloatVT)) - setOperationAction( - {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, - Custom); + if (Subtarget.hasStdExtZvbb()) { + setOperationAction({ISD::BITREVERSE, ISD::BSWAP, ISD::CTLZ, + ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ, + ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP}, + VT, Custom); + } else { + // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the + // range of f32. + EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + if (isTypeLegal(FloatVT)) + setOperationAction( + {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, + Custom); + } } for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { @@ -4880,6 +4887,11 @@ static unsigned getRISCVVLOp(SDValue Op) { OP_CASE(SHL) OP_CASE(SRA) OP_CASE(SRL) + OP_CASE(BSWAP) + OP_CASE(CTTZ) + OP_CASE(CTLZ) + OP_CASE(CTPOP) + OP_CASE(BITREVERSE) OP_CASE(SADDSAT) OP_CASE(UADDSAT) OP_CASE(SSUBSAT) @@ -4927,8 +4939,10 @@ static unsigned getRISCVVLOp(SDValue Op) { VP_CASE(CTLZ) // VP_CTLZ VP_CASE(CTTZ) // VP_CTTZ VP_CASE(CTPOP) // VP_CTPOP + case ISD::CTLZ_ZERO_UNDEF: case ISD::VP_CTLZ_ZERO_UNDEF: return RISCVISD::CTLZ_VL; + case ISD::CTTZ_ZERO_UNDEF: case ISD::VP_CTTZ_ZERO_UNDEF: return RISCVISD::CTTZ_VL; case ISD::FMA: @@ -5156,6 +5170,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return LowerIS_FPCLASS(Op, DAG); case ISD::BITREVERSE: { MVT VT = Op.getSimpleValueType(); + if (VT.isFixedLengthVector()) { + assert(Subtarget.hasStdExtZvbb()); + return lowerToScalableOp(Op, DAG); + } SDLoc DL(Op); assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization"); assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode"); @@ -5668,6 +5686,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::SREM: case ISD::UDIV: case ISD::UREM: + case ISD::BSWAP: + case ISD::CTPOP: return lowerToScalableOp(Op, DAG); case ISD::SHL: case ISD::SRA: @@ -5702,7 +5722,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerABS(Op, DAG); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: + if (Subtarget.hasStdExtZvbb()) + return lowerToScalableOp(Op, DAG); + assert(Op.getOpcode() != ISD::CTTZ); return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); case ISD::VSELECT: return lowerFixedLengthVectorSelectToRVV(Op, DAG); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll index 12a9c84b4e2b8..f1b00e3986400 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX2-RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB define void @bitreverse_v8i16(ptr %x, ptr %y) { ; RV32-LABEL: bitreverse_v8i16: @@ -66,6 +68,14 @@ define void @bitreverse_v8i16(ptr %x, ptr %y) { ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vse16.v v8, (a0) ; RV64-NEXT: ret +; +; ZVBB-LABEL: bitreverse_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vbrev.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i16>, ptr %x %b = load <8 x i16>, ptr %y %c = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) @@ -152,6 +162,14 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) { ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret +; +; ZVBB-LABEL: bitreverse_v4i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vbrev.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i32>, ptr %x %b = load <4 x i32>, ptr %y %c = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) @@ -291,6 +309,14 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) { ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret +; +; ZVBB-LABEL: bitreverse_v2i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vbrev.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <2 x i64>, ptr %x %b = load <2 x i64>, ptr %y %c = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) @@ -465,6 +491,14 @@ define void @bitreverse_v16i16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: bitreverse_v16i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vbrev.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) @@ -683,6 +717,14 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: bitreverse_v8i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vbrev.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) @@ -1033,6 +1075,14 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: bitreverse_v4i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vbrev.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll index 9fc1d680c821c..e393fef62a251 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2-RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB define void @bswap_v8i16(ptr %x, ptr %y) { ; CHECK-LABEL: bswap_v8i16: @@ -14,6 +16,14 @@ define void @bswap_v8i16(ptr %x, ptr %y) { ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret +; +; ZVBB-LABEL: bswap_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vrev8.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i16>, ptr %x %b = load <8 x i16>, ptr %y %c = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a) @@ -58,6 +68,14 @@ define void @bswap_v4i32(ptr %x, ptr %y) { ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret +; +; ZVBB-LABEL: bswap_v4i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vrev8.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i32>, ptr %x %b = load <4 x i32>, ptr %y %c = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a) @@ -140,6 +158,14 @@ define void @bswap_v2i64(ptr %x, ptr %y) { ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret +; +; ZVBB-LABEL: bswap_v2i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vrev8.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <2 x i64>, ptr %x %b = load <2 x i64>, ptr %y %c = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a) @@ -200,6 +226,14 @@ define void @bswap_v16i16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: bswap_v16i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vrev8.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a) @@ -304,6 +338,14 @@ define void @bswap_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: bswap_v8i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vrev8.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a) @@ -510,6 +552,14 @@ define void @bswap_v4i64(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: bswap_v4i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vrev8.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll index 69a71c2e45818..f1a87318d25dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -11,6 +11,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB define void @ctlz_v16i8(ptr %x, ptr %y) nounwind { ; CHECK-LABEL: ctlz_v16i8: @@ -54,6 +56,14 @@ define void @ctlz_v16i8(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse8.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_v16i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVBB-NEXT: vle8.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse8.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i8>, ptr %x %b = load <16 x i8>, ptr %y %c = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) @@ -267,6 +277,14 @@ define void @ctlz_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse16.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i16>, ptr %x %b = load <8 x i16>, ptr %y %c = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) @@ -423,6 +441,14 @@ define void @ctlz_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_v4i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i32>, ptr %x %b = load <4 x i32>, ptr %y %c = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) @@ -621,6 +647,14 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse64.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_v2i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <2 x i64>, ptr %x %b = load <2 x i64>, ptr %y %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) @@ -719,6 +753,15 @@ define void @ctlz_v32i8(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse8.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_v32i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: li a1, 32 +; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; ZVBB-NEXT: vle8.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse8.v v8, (a0) +; ZVBB-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) @@ -928,6 +971,14 @@ define void @ctlz_v16i16(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse16.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_v16i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) @@ -1086,6 +1137,14 @@ define void @ctlz_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_v8i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) @@ -1284,6 +1343,14 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vminu.vx v8, v8, a1 ; LMULMAX8-NEXT: vse64.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_v4i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) @@ -1332,6 +1399,14 @@ define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vrsub.vx v8, v10, a1 ; LMULMAX8-NEXT: vse8.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_zero_undef_v16i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVBB-NEXT: vle8.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse8.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i8>, ptr %x %b = load <16 x i8>, ptr %y %c = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) @@ -1534,6 +1609,14 @@ define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX8-NEXT: vse16.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_zero_undef_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i16>, ptr %x %b = load <8 x i16>, ptr %y %c = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) @@ -1679,6 +1762,14 @@ define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_zero_undef_v4i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i32>, ptr %x %b = load <4 x i32>, ptr %y %c = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) @@ -1864,6 +1955,14 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX8-NEXT: vse64.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_zero_undef_v2i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <2 x i64>, ptr %x %b = load <2 x i64>, ptr %y %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) @@ -1959,6 +2058,15 @@ define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vrsub.vx v8, v12, a1 ; LMULMAX8-NEXT: vse8.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_zero_undef_v32i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: li a1, 32 +; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; ZVBB-NEXT: vle8.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse8.v v8, (a0) +; ZVBB-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) @@ -2165,6 +2273,14 @@ define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX8-NEXT: vse16.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_zero_undef_v16i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) @@ -2312,6 +2428,14 @@ define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_zero_undef_v8i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) @@ -2497,6 +2621,14 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vrsub.vx v8, v8, a1 ; LMULMAX8-NEXT: vse64.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: ctlz_zero_undef_v4i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vclz.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll index c5ed48ffdffe9..60af2188e754f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -3,6 +3,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB define void @ctpop_v16i8(ptr %x, ptr %y) { ; CHECK-LABEL: ctpop_v16i8: @@ -23,6 +25,14 @@ define void @ctpop_v16i8(ptr %x, ptr %y) { ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret +; +; ZVBB-LABEL: ctpop_v16i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVBB-NEXT: vle8.v v8, (a0) +; ZVBB-NEXT: vcpop.v v8, v8 +; ZVBB-NEXT: vse8.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i8>, ptr %x %b = load <16 x i8>, ptr %y %c = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) @@ -135,6 +145,14 @@ define void @ctpop_v8i16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: ctpop_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vcpop.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i16>, ptr %x %b = load <8 x i16>, ptr %y %c = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) @@ -251,6 +269,14 @@ define void @ctpop_v4i32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: ctpop_v4i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vcpop.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i32>, ptr %x %b = load <4 x i32>, ptr %y %c = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) @@ -411,6 +437,14 @@ define void @ctpop_v2i64(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: ctpop_v2i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vcpop.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <2 x i64>, ptr %x %b = load <2 x i64>, ptr %y %c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) @@ -471,6 +505,15 @@ define void @ctpop_v32i8(ptr %x, ptr %y) { ; LMULMAX1-NEXT: vse8.v v9, (a0) ; LMULMAX1-NEXT: vse8.v v8, (a1) ; LMULMAX1-NEXT: ret +; +; ZVBB-LABEL: ctpop_v32i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: li a1, 32 +; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; ZVBB-NEXT: vle8.v v8, (a0) +; ZVBB-NEXT: vcpop.v v8, v8 +; ZVBB-NEXT: vse8.v v8, (a0) +; ZVBB-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a) @@ -613,6 +656,14 @@ define void @ctpop_v16i16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: ctpop_v16i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vcpop.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a) @@ -759,6 +810,14 @@ define void @ctpop_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: ctpop_v8i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vcpop.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a) @@ -949,6 +1008,14 @@ define void @ctpop_v4i64(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret +; +; ZVBB-LABEL: ctpop_v4i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vcpop.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index 5f48a274df348..de89cb36373fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -11,6 +11,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8,LMULMAX8-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB define void @cttz_v16i8(ptr %x, ptr %y) nounwind { ; CHECK-LABEL: cttz_v16i8: @@ -54,6 +56,14 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0 ; LMULMAX8-NEXT: vse8.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_v16i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVBB-NEXT: vle8.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse8.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i8>, ptr %x %b = load <16 x i8>, ptr %y %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) @@ -262,6 +272,14 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 ; LMULMAX8-NEXT: vse16.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i16>, ptr %x %b = load <8 x i16>, ptr %y %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) @@ -419,6 +437,14 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_v4i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i32>, ptr %x %b = load <4 x i32>, ptr %y %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) @@ -640,6 +666,14 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v9, a1, v0 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV64-NEXT: ret +; +; ZVBB-LABEL: cttz_v2i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <2 x i64>, ptr %x %b = load <2 x i64>, ptr %y %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) @@ -731,6 +765,15 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vmerge.vim v8, v8, 8, v0 ; LMULMAX8-NEXT: vse8.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_v32i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: li a1, 32 +; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; ZVBB-NEXT: vle8.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse8.v v8, (a0) +; ZVBB-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) @@ -911,6 +954,14 @@ define void @cttz_v16i16(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX8-NEXT: vse16.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_v16i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) @@ -1070,6 +1121,14 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_v8i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) @@ -1291,6 +1350,14 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV64-NEXT: ret +; +; ZVBB-LABEL: cttz_v4i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) @@ -1339,6 +1406,14 @@ define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vsub.vx v8, v10, a1 ; LMULMAX8-NEXT: vse8.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_zero_undef_v16i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVBB-NEXT: vle8.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse8.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i8>, ptr %x %b = load <16 x i8>, ptr %y %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) @@ -1531,6 +1606,14 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vsub.vx v8, v8, a1 ; LMULMAX8-NEXT: vse16.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_zero_undef_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i16>, ptr %x %b = load <8 x i16>, ptr %y %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) @@ -1672,6 +1755,14 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vsub.vx v8, v8, a1 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_zero_undef_v4i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i32>, ptr %x %b = load <4 x i32>, ptr %y %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) @@ -1873,6 +1964,14 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX8-RV64-NEXT: vsub.vx v8, v8, a1 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV64-NEXT: ret +; +; ZVBB-LABEL: cttz_zero_undef_v2i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <2 x i64>, ptr %x %b = load <2 x i64>, ptr %y %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) @@ -1961,6 +2060,15 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vsub.vx v8, v12, a1 ; LMULMAX8-NEXT: vse8.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_zero_undef_v32i8: +; ZVBB: # %bb.0: +; ZVBB-NEXT: li a1, 32 +; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; ZVBB-NEXT: vle8.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse8.v v8, (a0) +; ZVBB-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) @@ -2137,6 +2245,14 @@ define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vsub.vx v8, v8, a1 ; LMULMAX8-NEXT: vse16.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_zero_undef_v16i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVBB-NEXT: vle16.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse16.v v8, (a0) +; ZVBB-NEXT: ret %a = load <16 x i16>, ptr %x %b = load <16 x i16>, ptr %y %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) @@ -2280,6 +2396,14 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { ; LMULMAX8-NEXT: vsub.vx v8, v8, a1 ; LMULMAX8-NEXT: vse32.v v8, (a0) ; LMULMAX8-NEXT: ret +; +; ZVBB-LABEL: cttz_zero_undef_v8i32: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZVBB-NEXT: vle32.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse32.v v8, (a0) +; ZVBB-NEXT: ret %a = load <8 x i32>, ptr %x %b = load <8 x i32>, ptr %y %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) @@ -2481,6 +2605,14 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX8-RV64-NEXT: vsub.vx v8, v8, a1 ; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV64-NEXT: ret +; +; ZVBB-LABEL: cttz_zero_undef_v4i64: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; ZVBB-NEXT: vle64.v v8, (a0) +; ZVBB-NEXT: vctz.v v8, v8 +; ZVBB-NEXT: vse64.v v8, (a0) +; ZVBB-NEXT: ret %a = load <4 x i64>, ptr %x %b = load <4 x i64>, ptr %y %c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)