diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index f3cce950ed7b5..8aa684c56bde0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3573,6 +3573,13 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { /*Decrement=*/true); } +bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) { + return selectVSplatImmHelper( + N, SplatVal, *CurDAG, *Subtarget, + [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }, + /*Decrement=*/false); +} + bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal) { return selectVSplatImmHelper( diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 5048a80fdd18f..db09ad146b655 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -137,6 +137,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { return selectVSplatUimm(N, Bits, Val); } bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal); + bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal); bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal); // Matches the splat of a value which can be extended or truncated, such that // only the bottom 8 bits are preserved. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 2bd61883760e5..b2c5261ae6c2d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -877,9 +877,9 @@ foreach mti = AllMasks in // 11.1. Vector Single-Width Integer Add and Subtract defm : VPatBinarySDNode_VV_VX_VI; defm : VPatBinarySDNode_VV_VX; -// Handle VRSUB specially since it's the only integer binary op with reversed -// pattern operands foreach vti = AllIntegerVectors in { + // Handle VRSUB specially since it's the only integer binary op with reversed + // pattern operands // FIXME: The AddedComplexity here is covering up a missing matcher for // widening vwsub.vx which can recognize a extended folded into the // scalar of the splat. @@ -896,6 +896,15 @@ foreach vti = AllIntegerVectors in { (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, simm5:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; } + + let Predicates = GetVTypePredicates.Predicates in { + // Match VSUB with a small immediate to vadd.vi by negating the immediate. + def : Pat<(sub (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat_simm5_plus1_nodec simm5_plus1:$rs2))), + (!cast("PseudoVADD_VI_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, + (NegImm simm5_plus1:$rs2), vti.AVL, vti.Log2SEW, TA_MA)>; + } } // 11.2. Vector Widening Integer Add and Subtract diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 43cfc9d1e77ca..5d98ffedcbb9a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -598,6 +598,8 @@ def SplatPat_uimm5 : ComplexPattern", [], [], def SplatPat_uimm6 : ComplexPattern", [], [], 3>; def SplatPat_simm5_plus1 : ComplexPattern; +def SplatPat_simm5_plus1_nodec + : ComplexPattern; def SplatPat_simm5_plus1_nonzero : ComplexPattern; @@ -1992,10 +1994,10 @@ multiclass VPatAVGADDVL_VV_VX_RM { // 11.1. Vector Single-Width Integer Add and Subtract defm : VPatBinaryVL_VV_VX_VI; defm : VPatBinaryVL_VV_VX; -// Handle VRSUB specially since it's the only integer binary op with reversed -// pattern operands foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { + // Handle VRSUB specially since it's the only integer binary op with + // reversed pattern operands def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))), (vti.Vector vti.RegClass:$rs1), vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), @@ -2008,6 +2010,15 @@ foreach vti = AllIntegerVectors in { (!cast("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, simm5:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + // Match VSUB with a small immediate to vadd.vi by negating the immediate. + def : Pat<(riscv_sub_vl (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat_simm5_plus1_nodec simm5_plus1:$rs2)), + vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), + (!cast("PseudoVADD_VI_"#vti.LMul.MX#"_MASK") + vti.RegClass:$passthru, vti.RegClass:$rs1, + (NegImm simm5_plus1:$rs2), (vti.Mask VMV0:$vm), + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index 6f515996677ee..ceca813782461 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -2585,8 +2585,7 @@ define @vp_ctlz_nxv1i9( %va, @vp_ctlz_nxv1i9( %va, @llvm.vp.ctlz.nxv1i9( %va, i1 false, %m, i32 %evl) ret %v @@ -2744,13 +2742,12 @@ define @vp_ctlo_zero_nxv1i9_unpredicated_ctlz_with_vp_xor( @vp_ctlo_zero_nxv1i9_unpredicated_ctlz_with_vp_xor( @llvm.vp.xor.nxv1i9( %va, splat (i9 -1), %m, i32 %evl) %v = call @llvm.ctlz( %va.not, i1 false) diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll index 5761ae0926eae..bd7a20f9ef590 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -11,12 +11,11 @@ define @cttz_nxv1i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv1i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -77,12 +76,11 @@ declare @llvm.cttz.nxv1i8(, i1) define @cttz_nxv2i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv2i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -143,12 +141,11 @@ declare @llvm.cttz.nxv2i8(, i1) define @cttz_nxv4i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv4i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -209,12 +206,11 @@ declare @llvm.cttz.nxv4i8(, i1) define @cttz_nxv8i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv8i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -275,12 +271,11 @@ declare @llvm.cttz.nxv8i8(, i1) define @cttz_nxv16i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv16i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -341,12 +336,11 @@ declare @llvm.cttz.nxv16i8(, i1) define @cttz_nxv32i8( %va) { ; CHECK-LABEL: cttz_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: li a0, 51 @@ -373,12 +367,11 @@ declare @llvm.cttz.nxv32i8(, i1) define @cttz_nxv64i8( %va) { ; CHECK-LABEL: cttz_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v16, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: li a0, 51 @@ -405,13 +398,12 @@ declare @llvm.cttz.nxv64i8(, i1) define @cttz_nxv1i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv1i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -472,13 +464,12 @@ declare @llvm.cttz.nxv1i16(, i1) define @cttz_nxv2i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv2i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -539,13 +530,12 @@ declare @llvm.cttz.nxv2i16(, i1) define @cttz_nxv4i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv4i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -606,13 +596,12 @@ declare @llvm.cttz.nxv4i16(, i1) define @cttz_nxv8i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv8i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -673,13 +662,12 @@ declare @llvm.cttz.nxv8i16(, i1) define @cttz_nxv16i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv16i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v12, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v12, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v12, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -740,13 +728,12 @@ declare @llvm.cttz.nxv16i16(, i1) define @cttz_nxv32i16( %va) { ; CHECK-LABEL: cttz_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v16, v8 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: lui a0, 3 @@ -779,13 +766,12 @@ declare @llvm.cttz.nxv32i16(, i1) define @cttz_nxv1i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv1i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -850,13 +836,12 @@ declare @llvm.cttz.nxv1i32(, i1) define @cttz_nxv2i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv2i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -921,13 +906,12 @@ declare @llvm.cttz.nxv2i32(, i1) define @cttz_nxv4i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv4i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -992,13 +976,12 @@ declare @llvm.cttz.nxv4i32(, i1) define @cttz_nxv8i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv8i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v12, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v12, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v12, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -1063,13 +1046,12 @@ declare @llvm.cttz.nxv8i32(, i1) define @cttz_nxv16i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv16i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v16, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v16, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v16 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v16, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -1135,18 +1117,17 @@ declare @llvm.cttz.nxv16i32(, i1) define @cttz_nxv1i64( %va) { ; RV32I-LABEL: cttz_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32I-NEXT: vnot.v v9, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vadd.vi v9, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV32I-NEXT: vmv.v.x v10, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32I-NEXT: vand.vv v8, v9, v8 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v9, v8, 1 ; RV32I-NEXT: vand.vv v9, v9, v10 ; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma @@ -1177,17 +1158,14 @@ define @cttz_nxv1i64( %va) { ; ; RV64I-LABEL: cttz_nxv1i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m1, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -1196,6 +1174,8 @@ define @cttz_nxv1i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m1, ta, ma +; RV64I-NEXT: vadd.vi v9, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v9 ; RV64I-NEXT: vsrl.vi v9, v8, 1 @@ -1261,18 +1241,17 @@ declare @llvm.cttz.nxv1i64(, i1) define @cttz_nxv2i64( %va) { ; RV32I-LABEL: cttz_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32I-NEXT: vnot.v v10, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vadd.vi v10, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV32I-NEXT: vmv.v.x v12, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32I-NEXT: vand.vv v8, v10, v8 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v10, v8, 1 ; RV32I-NEXT: vand.vv v10, v10, v12 ; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma @@ -1303,17 +1282,14 @@ define @cttz_nxv2i64( %va) { ; ; RV64I-LABEL: cttz_nxv2i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m2, ta, ma -; RV64I-NEXT: vsub.vx v10, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -1322,6 +1298,8 @@ define @cttz_nxv2i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m2, ta, ma +; RV64I-NEXT: vadd.vi v10, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v10 ; RV64I-NEXT: vsrl.vi v10, v8, 1 @@ -1387,18 +1365,17 @@ declare @llvm.cttz.nxv2i64(, i1) define @cttz_nxv4i64( %va) { ; RV32I-LABEL: cttz_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32I-NEXT: vnot.v v12, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vadd.vi v12, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32I-NEXT: vmv.v.x v16, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32I-NEXT: vand.vv v8, v12, v8 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v12, v8, 1 ; RV32I-NEXT: vand.vv v12, v12, v16 ; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma @@ -1429,17 +1406,14 @@ define @cttz_nxv4i64( %va) { ; ; RV64I-LABEL: cttz_nxv4i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m4, ta, ma -; RV64I-NEXT: vsub.vx v12, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -1448,6 +1422,8 @@ define @cttz_nxv4i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m4, ta, ma +; RV64I-NEXT: vadd.vi v12, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v12 ; RV64I-NEXT: vsrl.vi v12, v8, 1 @@ -1513,18 +1489,17 @@ declare @llvm.cttz.nxv4i64(, i1) define @cttz_nxv8i64( %va) { ; RV32I-LABEL: cttz_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32I-NEXT: vnot.v v16, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vadd.vi v16, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV32I-NEXT: vmv.v.x v24, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32I-NEXT: vand.vv v8, v16, v8 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v16, v8, 1 ; RV32I-NEXT: vand.vv v24, v16, v24 ; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma @@ -1555,17 +1530,14 @@ define @cttz_nxv8i64( %va) { ; ; RV64I-LABEL: cttz_nxv8i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV64I-NEXT: vsub.vx v16, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -1574,6 +1546,8 @@ define @cttz_nxv8i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m8, ta, ma +; RV64I-NEXT: vadd.vi v16, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v16 ; RV64I-NEXT: vsrl.vi v16, v8, 1 @@ -1639,12 +1613,11 @@ declare @llvm.cttz.nxv8i64(, i1) define @cttz_zero_undef_nxv1i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -1700,12 +1673,11 @@ define @cttz_zero_undef_nxv1i8( %va) { define @cttz_zero_undef_nxv2i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -1761,12 +1733,11 @@ define @cttz_zero_undef_nxv2i8( %va) { define @cttz_zero_undef_nxv4i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -1822,12 +1793,11 @@ define @cttz_zero_undef_nxv4i8( %va) { define @cttz_zero_undef_nxv8i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -1883,12 +1853,11 @@ define @cttz_zero_undef_nxv8i8( %va) { define @cttz_zero_undef_nxv16i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -1944,12 +1913,11 @@ define @cttz_zero_undef_nxv16i8( %va) { define @cttz_zero_undef_nxv32i8( %va) { ; CHECK-LABEL: cttz_zero_undef_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: li a0, 51 @@ -1975,12 +1943,11 @@ define @cttz_zero_undef_nxv32i8( %va) { define @cttz_zero_undef_nxv64i8( %va) { ; CHECK-LABEL: cttz_zero_undef_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v16, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: li a0, 51 @@ -2006,13 +1973,12 @@ define @cttz_zero_undef_nxv64i8( %va) { define @cttz_zero_undef_nxv1i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -2066,13 +2032,12 @@ define @cttz_zero_undef_nxv1i16( %va) { define @cttz_zero_undef_nxv2i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -2126,13 +2091,12 @@ define @cttz_zero_undef_nxv2i16( %va) { define @cttz_zero_undef_nxv4i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -2186,13 +2150,12 @@ define @cttz_zero_undef_nxv4i16( %va) { define @cttz_zero_undef_nxv8i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -2246,13 +2209,12 @@ define @cttz_zero_undef_nxv8i16( %va) { define @cttz_zero_undef_nxv16i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v12, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v12, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v12, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -2306,13 +2268,12 @@ define @cttz_zero_undef_nxv16i16( %va) { define @cttz_zero_undef_nxv32i16( %va) { ; CHECK-LABEL: cttz_zero_undef_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v16, v8 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: lui a0, 3 @@ -2344,13 +2305,12 @@ define @cttz_zero_undef_nxv32i16( %va) { define @cttz_zero_undef_nxv1i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -2408,13 +2368,12 @@ define @cttz_zero_undef_nxv1i32( %va) { define @cttz_zero_undef_nxv2i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -2472,13 +2431,12 @@ define @cttz_zero_undef_nxv2i32( %va) { define @cttz_zero_undef_nxv4i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -2536,13 +2494,12 @@ define @cttz_zero_undef_nxv4i32( %va) { define @cttz_zero_undef_nxv8i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v12, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v12, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v12, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -2600,13 +2557,12 @@ define @cttz_zero_undef_nxv8i32( %va) { define @cttz_zero_undef_nxv16i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v16, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v16, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v16 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v16, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -2665,18 +2621,17 @@ define @cttz_zero_undef_nxv16i32( %va) { define @cttz_zero_undef_nxv1i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32I-NEXT: vnot.v v9, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vadd.vi v9, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV32I-NEXT: vmv.v.x v10, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32I-NEXT: vand.vv v8, v9, v8 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v9, v8, 1 ; RV32I-NEXT: vand.vv v9, v9, v10 ; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma @@ -2707,17 +2662,14 @@ define @cttz_zero_undef_nxv1i64( %va) { ; ; RV64I-LABEL: cttz_zero_undef_nxv1i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m1, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -2726,6 +2678,8 @@ define @cttz_zero_undef_nxv1i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m1, ta, ma +; RV64I-NEXT: vadd.vi v9, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v9 ; RV64I-NEXT: vsrl.vi v9, v8, 1 @@ -2783,18 +2737,17 @@ define @cttz_zero_undef_nxv1i64( %va) { define @cttz_zero_undef_nxv2i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32I-NEXT: vnot.v v10, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vadd.vi v10, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV32I-NEXT: vmv.v.x v12, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32I-NEXT: vand.vv v8, v10, v8 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v10, v8, 1 ; RV32I-NEXT: vand.vv v10, v10, v12 ; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma @@ -2825,17 +2778,14 @@ define @cttz_zero_undef_nxv2i64( %va) { ; ; RV64I-LABEL: cttz_zero_undef_nxv2i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m2, ta, ma -; RV64I-NEXT: vsub.vx v10, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -2844,6 +2794,8 @@ define @cttz_zero_undef_nxv2i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m2, ta, ma +; RV64I-NEXT: vadd.vi v10, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v10 ; RV64I-NEXT: vsrl.vi v10, v8, 1 @@ -2901,18 +2853,17 @@ define @cttz_zero_undef_nxv2i64( %va) { define @cttz_zero_undef_nxv4i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32I-NEXT: vnot.v v12, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vadd.vi v12, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32I-NEXT: vmv.v.x v16, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32I-NEXT: vand.vv v8, v12, v8 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v12, v8, 1 ; RV32I-NEXT: vand.vv v12, v12, v16 ; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma @@ -2943,17 +2894,14 @@ define @cttz_zero_undef_nxv4i64( %va) { ; ; RV64I-LABEL: cttz_zero_undef_nxv4i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m4, ta, ma -; RV64I-NEXT: vsub.vx v12, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -2962,6 +2910,8 @@ define @cttz_zero_undef_nxv4i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m4, ta, ma +; RV64I-NEXT: vadd.vi v12, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v12 ; RV64I-NEXT: vsrl.vi v12, v8, 1 @@ -3019,18 +2969,17 @@ define @cttz_zero_undef_nxv4i64( %va) { define @cttz_zero_undef_nxv8i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32I-NEXT: vnot.v v16, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vadd.vi v16, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV32I-NEXT: vmv.v.x v24, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32I-NEXT: vand.vv v8, v16, v8 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v16, v8, 1 ; RV32I-NEXT: vand.vv v24, v16, v24 ; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma @@ -3061,17 +3010,14 @@ define @cttz_zero_undef_nxv8i64( %va) { ; ; RV64I-LABEL: cttz_zero_undef_nxv8i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV64I-NEXT: vsub.vx v16, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -3080,6 +3026,8 @@ define @cttz_zero_undef_nxv8i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m8, ta, ma +; RV64I-NEXT: vadd.vi v16, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v16 ; RV64I-NEXT: vsrl.vi v16, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index 766717d92a749..38ef54ff4fe41 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -13,9 +13,8 @@ declare @llvm.vp.cttz.nxv1i8(, i1 immarg, @vp_cttz_nxv1i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -44,12 +43,11 @@ define @vp_cttz_nxv1i8( %va, @vp_cttz_nxv1i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -77,9 +75,8 @@ declare @llvm.vp.cttz.nxv2i8(, i1 immarg, @vp_cttz_nxv2i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -108,12 +105,11 @@ define @vp_cttz_nxv2i8( %va, @vp_cttz_nxv2i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -141,9 +137,8 @@ declare @llvm.vp.cttz.nxv4i8(, i1 immarg, @vp_cttz_nxv4i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -172,12 +167,11 @@ define @vp_cttz_nxv4i8( %va, @vp_cttz_nxv4i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -205,9 +199,8 @@ declare @llvm.vp.cttz.nxv8i8(, i1 immarg, @vp_cttz_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -236,12 +229,11 @@ define @vp_cttz_nxv8i8( %va, @vp_cttz_nxv8i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -269,9 +261,8 @@ declare @llvm.vp.cttz.nxv16i8(, i1 immarg, define @vp_cttz_nxv16i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v10, v0.t @@ -300,12 +291,11 @@ define @vp_cttz_nxv16i8( %va, @vp_cttz_nxv16i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v10, v8 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: li a0, 51 @@ -333,9 +323,8 @@ declare @llvm.vp.cttz.nxv32i8(, i1 immarg, define @vp_cttz_nxv32i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v12, v0.t @@ -364,12 +353,11 @@ define @vp_cttz_nxv32i8( %va, @vp_cttz_nxv32i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv32i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: li a0, 51 @@ -397,9 +385,8 @@ declare @llvm.vp.cttz.nxv64i8(, i1 immarg, define @vp_cttz_nxv64i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v16, v0.t @@ -428,12 +415,11 @@ define @vp_cttz_nxv64i8( %va, @vp_cttz_nxv64i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv64i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v16, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: li a0, 51 @@ -461,9 +447,8 @@ declare @llvm.vp.cttz.nxv1i16(, i1 immarg, define @vp_cttz_nxv1i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -499,13 +484,12 @@ define @vp_cttz_nxv1i16( %va, @vp_cttz_nxv1i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -539,9 +523,8 @@ declare @llvm.vp.cttz.nxv2i16(, i1 immarg, define @vp_cttz_nxv2i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -577,13 +560,12 @@ define @vp_cttz_nxv2i16( %va, @vp_cttz_nxv2i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -617,9 +599,8 @@ declare @llvm.vp.cttz.nxv4i16(, i1 immarg, define @vp_cttz_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -655,13 +636,12 @@ define @vp_cttz_nxv4i16( %va, @vp_cttz_nxv4i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -695,9 +675,8 @@ declare @llvm.vp.cttz.nxv8i16(, i1 immarg, define @vp_cttz_nxv8i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -733,13 +712,12 @@ define @vp_cttz_nxv8i16( %va, @vp_cttz_nxv8i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 3 @@ -773,9 +751,8 @@ declare @llvm.vp.cttz.nxv16i16(, i1 immar define @vp_cttz_nxv16i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -811,13 +788,12 @@ define @vp_cttz_nxv16i16( %va, @vp_cttz_nxv16i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v12, v8 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: lui a0, 3 @@ -851,9 +827,8 @@ declare @llvm.vp.cttz.nxv32i16(, i1 immar define @vp_cttz_nxv32i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -889,13 +864,12 @@ define @vp_cttz_nxv32i16( %va, @vp_cttz_nxv32i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv32i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v16, v8 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: lui a0, 3 @@ -929,9 +903,8 @@ declare @llvm.vp.cttz.nxv1i32(, i1 immarg, define @vp_cttz_nxv1i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -968,13 +941,12 @@ define @vp_cttz_nxv1i32( %va, @vp_cttz_nxv1i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -1009,9 +981,8 @@ declare @llvm.vp.cttz.nxv2i32(, i1 immarg, define @vp_cttz_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -1048,13 +1019,12 @@ define @vp_cttz_nxv2i32( %va, @vp_cttz_nxv2i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -1089,9 +1059,8 @@ declare @llvm.vp.cttz.nxv4i32(, i1 immarg, define @vp_cttz_nxv4i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -1128,13 +1097,12 @@ define @vp_cttz_nxv4i32( %va, @vp_cttz_nxv4i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 209715 @@ -1169,9 +1137,8 @@ declare @llvm.vp.cttz.nxv8i32(, i1 immarg, define @vp_cttz_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -1208,13 +1175,12 @@ define @vp_cttz_nxv8i32( %va, @vp_cttz_nxv8i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v12, v8 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: lui a0, 209715 @@ -1249,9 +1215,8 @@ declare @llvm.vp.cttz.nxv16i32(, i1 immar define @vp_cttz_nxv16i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -1288,13 +1253,12 @@ define @vp_cttz_nxv16i32( %va, @vp_cttz_nxv16i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v16, v8 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: lui a0, 209715 @@ -1329,9 +1293,8 @@ declare @llvm.vp.cttz.nxv1i64(, i1 immarg, define @vp_cttz_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t +; RV32-NEXT: vadd.vi v9, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma @@ -1371,39 +1334,38 @@ define @vp_cttz_nxv1i64( %va, @vp_cttz_nxv1i64( %va, @vp_cttz_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vnot.v v9, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v9, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v9, v8 +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma @@ -1461,38 +1422,37 @@ define @vp_cttz_nxv1i64_unmasked( %va, i32 ; ; RV64-LABEL: vp_cttz_nxv1i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v9, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v9, v9, a0 +; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: vand.vx v9, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1511,9 +1471,8 @@ declare @llvm.vp.cttz.nxv2i64(, i1 immarg, define @vp_cttz_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t +; RV32-NEXT: vadd.vi v10, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma @@ -1553,39 +1512,38 @@ define @vp_cttz_nxv2i64( %va, @vp_cttz_nxv2i64( %va, @vp_cttz_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vnot.v v10, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v10, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v10, v8 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma @@ -1643,38 +1600,37 @@ define @vp_cttz_nxv2i64_unmasked( %va, i32 ; ; RV64-LABEL: vp_cttz_nxv2i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v10, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v10, a1 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vand.vx v10, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1693,9 +1649,8 @@ declare @llvm.vp.cttz.nxv4i64(, i1 immarg, define @vp_cttz_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t +; RV32-NEXT: vadd.vi v12, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma @@ -1735,39 +1690,38 @@ define @vp_cttz_nxv4i64( %va, @vp_cttz_nxv4i64( %va, @vp_cttz_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vnot.v v12, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v12, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v12, v8 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma @@ -1825,38 +1778,37 @@ define @vp_cttz_nxv4i64_unmasked( %va, i32 ; ; RV64-LABEL: vp_cttz_nxv4i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v12, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v12, v12, a0 +; RV64-NEXT: vand.vx v12, v12, a1 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: vand.vx v12, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1875,9 +1827,8 @@ declare @llvm.vp.cttz.nxv7i64(, i1 immarg, define @vp_cttz_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -1917,39 +1868,38 @@ define @vp_cttz_nxv7i64( %va, @vp_cttz_nxv7i64( %va, @vp_cttz_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -2007,38 +1956,37 @@ define @vp_cttz_nxv7i64_unmasked( %va, i32 ; ; RV64-LABEL: vp_cttz_nxv7i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2057,9 +2005,8 @@ declare @llvm.vp.cttz.nxv8i64(, i1 immarg, define @vp_cttz_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -2099,39 +2046,38 @@ define @vp_cttz_nxv8i64( %va, @vp_cttz_nxv8i64( %va, @vp_cttz_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -2189,38 +2134,37 @@ define @vp_cttz_nxv8i64_unmasked( %va, i32 ; ; RV64-LABEL: vp_cttz_nxv8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2254,138 +2198,131 @@ define @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64_unmasked( %va, i ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 1 -; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: lui a4, 209715 -; RV32-NEXT: sub a5, a0, a1 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli a6, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v0, a3 -; RV32-NEXT: sltu a3, a0, a5 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a5 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v16, a2 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: sub a4, a0, a1 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: sltu a2, a0, a4 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v24, v16, -1 ; RV32-NEXT: vnot.v v16, v16 ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v0, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v0, a4 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v24, v0 +; RV32-NEXT: vsub.vv v16, v16, v24 +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v16, v0 ; RV32-NEXT: vadd.vv v16, v24, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: lui a4, 61681 -; RV32-NEXT: lui a5, 4112 -; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: addi a5, a5, 257 -; RV32-NEXT: vsetvli a6, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: lui a4, 4112 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: addi a4, a4, 257 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a5 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a4 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v16, v16, v24 -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v16, a3 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsrl.vx v16, v16, a2 ; RV32-NEXT: bltu a0, a1, .LBB47_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB47_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v8, a2 +; RV32-NEXT: vadd.vi v24, v8, -1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 1 @@ -2715,7 +2650,7 @@ define @vp_cttz_nxv16i64_unmasked( %va, i ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: vsrl.vx v8, v8, a3 +; RV32-NEXT: vsrl.vx v8, v8, a2 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 @@ -2727,64 +2662,63 @@ define @vp_cttz_nxv16i64_unmasked( %va, i ; RV64-LABEL: vp_cttz_nxv16i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 1 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: sub a7, a0, a1 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: addiw t0, a5, -241 -; RV64-NEXT: addiw t1, a6, 257 -; RV64-NEXT: slli a6, a3, 32 -; RV64-NEXT: add a6, a3, a6 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a5, a4, a5 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: sub a6, a0, a1 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a7, a4, -241 +; RV64-NEXT: addiw t0, a5, 257 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a5, a2, a5 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a4, a3, a4 +; RV64-NEXT: slli a2, a7, 32 +; RV64-NEXT: add a2, a7, a2 ; RV64-NEXT: slli a3, t0, 32 ; RV64-NEXT: add a3, t0, a3 -; RV64-NEXT: slli a4, t1, 32 -; RV64-NEXT: add a4, t1, a4 -; RV64-NEXT: sltu t0, a0, a7 -; RV64-NEXT: addi t0, t0, -1 -; RV64-NEXT: and a7, t0, a7 -; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v16, a2 +; RV64-NEXT: sltu a7, a0, a6 +; RV64-NEXT: addi a7, a7, -1 +; RV64-NEXT: and a6, a7, a6 +; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v24, v16, -1 ; RV64-NEXT: vnot.v v16, v16 ; RV64-NEXT: vand.vv v16, v16, v24 ; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vand.vx v24, v24, a6 +; RV64-NEXT: vand.vx v24, v24, a5 ; RV64-NEXT: vsub.vv v16, v16, v24 -; RV64-NEXT: vand.vx v24, v16, a5 +; RV64-NEXT: vand.vx v24, v16, a4 ; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a5 +; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vadd.vv v16, v24, v16 ; RV64-NEXT: vsrl.vi v24, v16, 4 ; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: vand.vx v16, v16, a3 -; RV64-NEXT: vmul.vx v16, v16, a4 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v16, v16, a7 +; RV64-NEXT: vand.vx v16, v16, a2 +; RV64-NEXT: vmul.vx v16, v16, a3 +; RV64-NEXT: li a6, 56 +; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: bltu a0, a1, .LBB47_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB47_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v8, a2 +; RV64-NEXT: vadd.vi v24, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v24 ; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a6 +; RV64-NEXT: vand.vx v24, v24, a5 ; RV64-NEXT: vsub.vv v8, v8, v24 -; RV64-NEXT: vand.vx v24, v8, a5 +; RV64-NEXT: vand.vx v24, v8, a4 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v8, v8, a4 ; RV64-NEXT: vadd.vv v8, v24, v8 ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vmul.vx v8, v8, a4 -; RV64-NEXT: vsrl.vx v8, v8, a7 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vsrl.vx v8, v8, a6 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64_unmasked: @@ -3080,9 +3014,8 @@ define @vp_cttz_zero_undef_nxv16i8_unmasked( @vp_cttz_zero_undef_nxv32i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v12, v0.t @@ -3111,12 +3044,11 @@ define @vp_cttz_zero_undef_nxv32i8( %va, @vp_cttz_zero_undef_nxv32i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv32i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: li a0, 51 @@ -3143,9 +3075,8 @@ define @vp_cttz_zero_undef_nxv32i8_unmasked( @vp_cttz_zero_undef_nxv64i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v16, v0.t @@ -3174,12 +3105,11 @@ define @vp_cttz_zero_undef_nxv64i8( %va, @vp_cttz_zero_undef_nxv64i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv64i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v16, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: li a0, 51 @@ -3436,9 +3366,8 @@ define @vp_cttz_zero_undef_nxv16i16_unmasked( @vp_cttz_zero_undef_nxv32i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -3474,13 +3403,12 @@ define @vp_cttz_zero_undef_nxv32i16( %va, define @vp_cttz_zero_undef_nxv32i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v16, v8 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: lui a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index a3eaf37631481..96acd0aceab13 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -9,9 +9,8 @@ declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32) define <2 x i8> @vp_cttz_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -34,12 +33,11 @@ define <2 x i8> @vp_cttz_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { define <2 x i8> @vp_cttz_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -61,9 +59,8 @@ declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32) define <4 x i8> @vp_cttz_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -86,12 +83,11 @@ define <4 x i8> @vp_cttz_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { define <4 x i8> @vp_cttz_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -113,9 +109,8 @@ declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32) define <8 x i8> @vp_cttz_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -138,12 +133,11 @@ define <8 x i8> @vp_cttz_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { define <8 x i8> @vp_cttz_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -165,9 +159,8 @@ declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32) define <16 x i8> @vp_cttz_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -190,12 +183,11 @@ define <16 x i8> @vp_cttz_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { define <16 x i8> @vp_cttz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -217,9 +209,8 @@ declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32) define <2 x i16> @vp_cttz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -249,13 +240,12 @@ define <2 x i16> @vp_cttz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -283,9 +273,8 @@ declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32) define <4 x i16> @vp_cttz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -315,13 +304,12 @@ define <4 x i16> @vp_cttz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -349,9 +337,8 @@ declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32) define <8 x i16> @vp_cttz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -381,13 +368,12 @@ define <8 x i16> @vp_cttz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -415,9 +401,8 @@ declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32) define <16 x i16> @vp_cttz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -447,13 +432,12 @@ define <16 x i16> @vp_cttz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 3 @@ -481,9 +465,8 @@ declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32) define <2 x i32> @vp_cttz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -514,13 +497,12 @@ define <2 x i32> @vp_cttz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -549,9 +531,8 @@ declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32) define <4 x i32> @vp_cttz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -582,13 +563,12 @@ define <4 x i32> @vp_cttz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -617,9 +597,8 @@ declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32) define <8 x i32> @vp_cttz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -650,13 +629,12 @@ define <8 x i32> @vp_cttz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 209715 @@ -685,9 +663,8 @@ declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32) define <16 x i32> @vp_cttz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -718,13 +695,12 @@ define <16 x i32> @vp_cttz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl define <16 x i32> @vp_cttz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v12, v8 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: lui a0, 209715 @@ -753,9 +729,8 @@ declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32) define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t +; RV32-NEXT: vadd.vi v9, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -795,39 +770,38 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v9, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v9, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vand.vx v9, v9, a0, v0.t +; RV64-NEXT: vand.vx v9, v9, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: vand.vx v9, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) @@ -837,18 +811,17 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vnot.v v9, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v9, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v9, v8 +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -879,38 +852,37 @@ define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v2i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v9, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v9, v9, a0 +; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: vand.vx v9, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -923,9 +895,8 @@ declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32) define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t +; RV32-NEXT: vadd.vi v10, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -965,39 +936,38 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v10, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v10, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vand.vx v10, v10, a0, v0.t +; RV64-NEXT: vand.vx v10, v10, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: vand.vx v10, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) @@ -1007,18 +977,17 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vnot.v v10, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v10, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v10, v8 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -1049,38 +1018,37 @@ define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v4i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v10, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v10, a1 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vand.vx v10, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1093,9 +1061,8 @@ declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32) define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t +; RV32-NEXT: vadd.vi v12, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -1135,39 +1102,38 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v12, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v12, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vand.vx v12, v12, a0, v0.t +; RV64-NEXT: vand.vx v12, v12, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: vand.vx v12, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) @@ -1177,18 +1143,17 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vnot.v v12, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v12, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v12, v8 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -1219,38 +1184,37 @@ define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v12, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v12, v12, a0 +; RV64-NEXT: vand.vx v12, v12, a1 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: vand.vx v12, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1263,9 +1227,8 @@ declare <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -1305,39 +1268,38 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; ; RV64-LABEL: vp_cttz_v15i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) @@ -1347,18 +1309,17 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -1389,38 +1350,37 @@ define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v15i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1433,9 +1393,8 @@ declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -1475,39 +1434,38 @@ define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; ; RV64-LABEL: vp_cttz_v16i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) @@ -1517,18 +1475,17 @@ define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -1559,38 +1516,37 @@ define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v16i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1615,152 +1571,151 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 16 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v24, v0, 2 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB34_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB34_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB34_2: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t +; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: addi a2, a2, 1365 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 4112 -; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 @@ -1772,8 +1727,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1787,29 +1742,29 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1817,8 +1772,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1829,7 +1784,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 @@ -1857,75 +1812,74 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: bltu a0, a1, .LBB34_2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB34_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a4, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB34_2: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: addiw a3, a3, 819 -; RV64-NEXT: addiw a7, a5, -241 -; RV64-NEXT: addiw t0, a6, 257 -; RV64-NEXT: slli a6, a2, 32 -; RV64-NEXT: add a6, a2, a6 -; RV64-NEXT: slli a5, a3, 32 -; RV64-NEXT: add a5, a3, a5 -; RV64-NEXT: slli a2, a7, 32 -; RV64-NEXT: add a2, a7, a2 -; RV64-NEXT: slli a3, t0, 32 -; RV64-NEXT: add a3, t0, a3 -; RV64-NEXT: addi a7, a0, -16 -; RV64-NEXT: sltu a0, a0, a7 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a5, a3, -241 +; RV64-NEXT: addiw a6, a4, 257 +; RV64-NEXT: slli a4, a1, 32 +; RV64-NEXT: add a4, a1, a4 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a3, a2, a3 +; RV64-NEXT: slli a1, a5, 32 +; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: slli a2, a6, 32 +; RV64-NEXT: add a2, a6, a2 +; RV64-NEXT: addi a5, a0, -16 +; RV64-NEXT: sltu a0, a0, a5 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a7, a0, a7 +; RV64-NEXT: and a5, a0, a5 ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a6, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a5, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vmul.vx v8, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a6, sp, 16 +; RV64-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 16 +; RV64-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a6, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a5, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vmul.vx v8, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1952,69 +1906,78 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a2, 1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v24, v8, -1 ; RV32-NEXT: vnot.v v0, v8 -; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v8, a2 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vand.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vsub.vv v8, v8, v0 +; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v0, v0, v8 +; RV32-NEXT: vsub.vv v24, v24, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v0, v16, a2 +; RV32-NEXT: vadd.vi v0, v16, -1 ; RV32-NEXT: vnot.v v16, v16 ; RV32-NEXT: vand.vv v0, v16, v0 -; RV32-NEXT: vsrl.vi v16, v0, 1 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v24, v16 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vsub.vv v24, v0, v24 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v0, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v0, v24, v16 ; RV32-NEXT: vsrl.vi v24, v24, 2 ; RV32-NEXT: vand.vv v16, v24, v16 ; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: lui a3, 4112 ; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: addi a3, a3, 257 ; RV32-NEXT: vadd.vv v16, v0, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2025,7 +1988,7 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v16, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -2040,70 +2003,69 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB35_2: -; RV64-NEXT: li a2, 1 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vnot.v v24, v8 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a3, 32 -; RV64-NEXT: add a3, a3, a7 -; RV64-NEXT: slli a7, a4, 32 -; RV64-NEXT: add a4, a4, a7 -; RV64-NEXT: slli a7, a5, 32 -; RV64-NEXT: add a5, a5, a7 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: addi a7, a0, -16 -; RV64-NEXT: sltu a0, a0, a7 +; RV64-NEXT: vadd.vi v24, v8, -1 +; RV64-NEXT: vnot.v v8, v8 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a2, 32 +; RV64-NEXT: add a2, a2, a6 +; RV64-NEXT: slli a6, a3, 32 +; RV64-NEXT: add a3, a3, a6 +; RV64-NEXT: slli a6, a4, 32 +; RV64-NEXT: add a4, a4, a6 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: addi a6, a0, -16 +; RV64-NEXT: sltu a0, a0, a6 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsub.vx v8, v8, a2 -; RV64-NEXT: vand.vv v8, v24, v8 +; RV64-NEXT: and a0, a0, a6 +; RV64-NEXT: li a6, 56 +; RV64-NEXT: vand.vv v8, v8, v24 ; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v8, v8, v24 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v16, a2 +; RV64-NEXT: vadd.vi v24, v16, -1 ; RV64-NEXT: vnot.v v16, v16 ; RV64-NEXT: vand.vv v16, v16, v24 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v24, v8, a4 +; RV64-NEXT: vand.vx v24, v8, a3 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vadd.vv v8, v24, v8 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v16, v16, v24 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vand.vx v24, v16, a4 +; RV64-NEXT: vand.vx v24, v16, a3 ; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vand.vx v16, v16, a3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v8, v8, a4 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v16, v24, v16 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmul.vx v8, v8, a6 +; RV64-NEXT: vmul.vx v8, v8, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 4 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vx v8, v8, a7 +; RV64-NEXT: vsrl.vx v8, v8, a6 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: vmul.vx v16, v16, a6 -; RV64-NEXT: vsrl.vx v16, v16, a7 +; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vmul.vx v16, v16, a5 +; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v @@ -2112,9 +2074,8 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { define <2 x i8> @vp_cttz_zero_undef_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -2137,12 +2098,11 @@ define <2 x i8> @vp_cttz_zero_undef_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext define <2 x i8> @vp_cttz_zero_undef_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -2162,9 +2122,8 @@ define <2 x i8> @vp_cttz_zero_undef_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl define <4 x i8> @vp_cttz_zero_undef_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -2187,12 +2146,11 @@ define <4 x i8> @vp_cttz_zero_undef_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext define <4 x i8> @vp_cttz_zero_undef_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -2212,9 +2170,8 @@ define <4 x i8> @vp_cttz_zero_undef_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl define <8 x i8> @vp_cttz_zero_undef_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -2237,12 +2194,11 @@ define <8 x i8> @vp_cttz_zero_undef_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext define <8 x i8> @vp_cttz_zero_undef_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -2262,9 +2218,8 @@ define <8 x i8> @vp_cttz_zero_undef_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl define <16 x i8> @vp_cttz_zero_undef_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -2287,12 +2242,11 @@ define <16 x i8> @vp_cttz_zero_undef_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zero define <16 x i8> @vp_cttz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -2312,9 +2266,8 @@ define <16 x i8> @vp_cttz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext % define <2 x i16> @vp_cttz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2344,13 +2297,12 @@ define <2 x i16> @vp_cttz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroe define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -2376,9 +2328,8 @@ define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext % define <4 x i16> @vp_cttz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2408,13 +2359,12 @@ define <4 x i16> @vp_cttz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroe define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -2440,9 +2390,8 @@ define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext % define <8 x i16> @vp_cttz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2472,13 +2421,12 @@ define <8 x i16> @vp_cttz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroe define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -2504,9 +2452,8 @@ define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext % define <16 x i16> @vp_cttz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2536,13 +2483,12 @@ define <16 x i16> @vp_cttz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 z define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 3 @@ -2568,9 +2514,8 @@ define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroex define <2 x i32> @vp_cttz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2601,13 +2546,12 @@ define <2 x i32> @vp_cttz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroe define <2 x i32> @vp_cttz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -2634,9 +2578,8 @@ define <2 x i32> @vp_cttz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext % define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2667,13 +2610,12 @@ define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroe define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -2700,9 +2642,8 @@ define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext % define <8 x i32> @vp_cttz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2733,13 +2674,12 @@ define <8 x i32> @vp_cttz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroe define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 209715 @@ -2766,9 +2706,8 @@ define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext % define <16 x i32> @vp_cttz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2799,13 +2738,12 @@ define <16 x i32> @vp_cttz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 z define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v12, v8 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: lui a0, 209715 @@ -2832,9 +2770,8 @@ define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroex define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t +; RV32-NEXT: vadd.vi v9, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -2874,39 +2811,38 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe ; ; RV64-LABEL: vp_cttz_zero_undef_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v9, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v9, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vand.vx v9, v9, a0, v0.t +; RV64-NEXT: vand.vx v9, v9, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: vand.vx v9, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 true, <2 x i1> %m, i32 %evl) @@ -2916,18 +2852,17 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vnot.v v9, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v9, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v9, v8 +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -2958,38 +2893,37 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext % ; ; RV64-LABEL: vp_cttz_zero_undef_v2i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v9, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v9, v9, a0 +; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: vand.vx v9, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3000,9 +2934,8 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext % define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t +; RV32-NEXT: vadd.vi v10, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -3042,39 +2975,38 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe ; ; RV64-LABEL: vp_cttz_zero_undef_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v10, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v10, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vand.vx v10, v10, a0, v0.t +; RV64-NEXT: vand.vx v10, v10, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: vand.vx v10, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 true, <4 x i1> %m, i32 %evl) @@ -3084,18 +3016,17 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vnot.v v10, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v10, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v10, v8 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -3126,38 +3057,37 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext % ; ; RV64-LABEL: vp_cttz_zero_undef_v4i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v10, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v10, a1 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vand.vx v10, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3168,9 +3098,8 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext % define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t +; RV32-NEXT: vadd.vi v12, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -3210,39 +3139,38 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe ; ; RV64-LABEL: vp_cttz_zero_undef_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v12, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v12, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vand.vx v12, v12, a0, v0.t +; RV64-NEXT: vand.vx v12, v12, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: vand.vx v12, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 true, <8 x i1> %m, i32 %evl) @@ -3252,18 +3180,17 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vnot.v v12, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v12, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v12, v8 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -3294,38 +3221,37 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % ; ; RV64-LABEL: vp_cttz_zero_undef_v8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v12, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v12, v12, a0 +; RV64-NEXT: vand.vx v12, v12, a1 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: vand.vx v12, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3336,9 +3262,8 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -3378,39 +3303,38 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl) @@ -3420,18 +3344,17 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -3462,38 +3385,37 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3504,9 +3426,8 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -3546,39 +3467,38 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; ; RV64-LABEL: vp_cttz_zero_undef_v16i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl) @@ -3588,18 +3508,17 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -3630,38 +3549,37 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; ; RV64-LABEL: vp_cttz_zero_undef_v16i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3684,152 +3602,151 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 16 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v24, v0, 2 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB70_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB70_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB70_2: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t +; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: addi a2, a2, 1365 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 4112 -; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 @@ -3841,8 +3758,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -3856,29 +3773,29 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -3886,8 +3803,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -3898,7 +3815,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 @@ -3926,75 +3843,74 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: bltu a0, a1, .LBB70_2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB70_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a4, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB70_2: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: addiw a3, a3, 819 -; RV64-NEXT: addiw a7, a5, -241 -; RV64-NEXT: addiw t0, a6, 257 -; RV64-NEXT: slli a6, a2, 32 -; RV64-NEXT: add a6, a2, a6 -; RV64-NEXT: slli a5, a3, 32 -; RV64-NEXT: add a5, a3, a5 -; RV64-NEXT: slli a2, a7, 32 -; RV64-NEXT: add a2, a7, a2 -; RV64-NEXT: slli a3, t0, 32 -; RV64-NEXT: add a3, t0, a3 -; RV64-NEXT: addi a7, a0, -16 -; RV64-NEXT: sltu a0, a0, a7 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a5, a3, -241 +; RV64-NEXT: addiw a6, a4, 257 +; RV64-NEXT: slli a4, a1, 32 +; RV64-NEXT: add a4, a1, a4 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a3, a2, a3 +; RV64-NEXT: slli a1, a5, 32 +; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: slli a2, a6, 32 +; RV64-NEXT: add a2, a6, a2 +; RV64-NEXT: addi a5, a0, -16 +; RV64-NEXT: sltu a0, a0, a5 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a7, a0, a7 +; RV64-NEXT: and a5, a0, a5 ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a6, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a5, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vmul.vx v8, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a6, sp, 16 +; RV64-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 16 +; RV64-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a6, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a5, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vmul.vx v8, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4021,69 +3937,78 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a2, 1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v24, v8, -1 ; RV32-NEXT: vnot.v v0, v8 -; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v8, a2 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vand.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vsub.vv v8, v8, v0 +; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v0, v0, v8 +; RV32-NEXT: vsub.vv v24, v24, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v0, v16, a2 +; RV32-NEXT: vadd.vi v0, v16, -1 ; RV32-NEXT: vnot.v v16, v16 ; RV32-NEXT: vand.vv v0, v16, v0 -; RV32-NEXT: vsrl.vi v16, v0, 1 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v24, v16 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vsub.vv v24, v0, v24 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v0, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v0, v24, v16 ; RV32-NEXT: vsrl.vi v24, v24, 2 ; RV32-NEXT: vand.vv v16, v24, v16 ; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: lui a3, 4112 ; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: addi a3, a3, 257 ; RV32-NEXT: vadd.vv v16, v0, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -4094,7 +4019,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v16, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -4109,70 +4034,69 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB71_2: -; RV64-NEXT: li a2, 1 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vnot.v v24, v8 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a3, 32 -; RV64-NEXT: add a3, a3, a7 -; RV64-NEXT: slli a7, a4, 32 -; RV64-NEXT: add a4, a4, a7 -; RV64-NEXT: slli a7, a5, 32 -; RV64-NEXT: add a5, a5, a7 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: addi a7, a0, -16 -; RV64-NEXT: sltu a0, a0, a7 +; RV64-NEXT: vadd.vi v24, v8, -1 +; RV64-NEXT: vnot.v v8, v8 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a2, 32 +; RV64-NEXT: add a2, a2, a6 +; RV64-NEXT: slli a6, a3, 32 +; RV64-NEXT: add a3, a3, a6 +; RV64-NEXT: slli a6, a4, 32 +; RV64-NEXT: add a4, a4, a6 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: addi a6, a0, -16 +; RV64-NEXT: sltu a0, a0, a6 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsub.vx v8, v8, a2 -; RV64-NEXT: vand.vv v8, v24, v8 +; RV64-NEXT: and a0, a0, a6 +; RV64-NEXT: li a6, 56 +; RV64-NEXT: vand.vv v8, v8, v24 ; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v8, v8, v24 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v16, a2 +; RV64-NEXT: vadd.vi v24, v16, -1 ; RV64-NEXT: vnot.v v16, v16 ; RV64-NEXT: vand.vv v16, v16, v24 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v24, v8, a4 +; RV64-NEXT: vand.vx v24, v8, a3 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vadd.vv v8, v24, v8 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v16, v16, v24 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vand.vx v24, v16, a4 +; RV64-NEXT: vand.vx v24, v16, a3 ; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vand.vx v16, v16, a3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v8, v8, a4 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v16, v24, v16 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmul.vx v8, v8, a6 +; RV64-NEXT: vmul.vx v8, v8, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 4 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vx v8, v8, a7 +; RV64-NEXT: vsrl.vx v8, v8, a6 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: vmul.vx v16, v16, a6 -; RV64-NEXT: vsrl.vx v16, v16, a7 +; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vmul.vx v16, v16, a5 +; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 true, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index 57e0eeb92ee2f..1724b92a9ab48 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -13,9 +13,8 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RVI-NEXT: vle8.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: li a1, 85 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -90,10 +89,9 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RVI-NEXT: vle16.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: lui a1, 5 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -168,10 +166,9 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RVI-NEXT: vle32.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: lui a1, 349525 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -254,11 +251,10 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind { ; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32I-NEXT: vmv.v.x v9, a1 -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32I-NEXT: vsub.vx v10, v8, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vadd.vi v10, v8, -1 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v10 ; RV32I-NEXT: vsrl.vi v10, v8, 1 @@ -310,8 +306,7 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind { ; RV64I-NEXT: add a3, a3, a5 ; RV64I-NEXT: slli a5, a4, 32 ; RV64I-NEXT: add a4, a4, a5 -; RV64I-NEXT: li a5, 1 -; RV64I-NEXT: vsub.vx v9, v8, a5 +; RV64I-NEXT: vadd.vi v9, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v9 ; RV64I-NEXT: vsrl.vi v9, v8, 1 @@ -392,9 +387,8 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { ; RVI-NEXT: li a1, 32 ; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RVI-NEXT: vle8.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: li a1, 85 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -472,10 +466,9 @@ define void @cttz_v16i16(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RVI-NEXT: vle16.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: lui a1, 5 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -550,10 +543,9 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RVI-NEXT: vle32.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: lui a1, 349525 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -636,11 +628,10 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind { ; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32I-NEXT: vmv.v.x v10, a1 -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32I-NEXT: vsub.vx v12, v8, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vadd.vi v12, v8, -1 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v12 ; RV32I-NEXT: vsrl.vi v12, v8, 1 @@ -692,8 +683,7 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind { ; RV64I-NEXT: add a3, a3, a5 ; RV64I-NEXT: slli a5, a4, 32 ; RV64I-NEXT: add a4, a4, a5 -; RV64I-NEXT: li a5, 1 -; RV64I-NEXT: vsub.vx v10, v8, a5 +; RV64I-NEXT: vadd.vi v10, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v10 ; RV64I-NEXT: vsrl.vi v10, v8, 1 @@ -773,9 +763,8 @@ define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RVI-NEXT: vle8.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: li a1, 85 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -845,10 +834,9 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RVI-NEXT: vle16.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: lui a1, 5 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -916,10 +904,9 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RVI-NEXT: vle32.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: lui a1, 349525 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -995,11 +982,10 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32I-NEXT: vmv.v.x v9, a1 -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32I-NEXT: vsub.vx v10, v8, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vadd.vi v10, v8, -1 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v10 ; RV32I-NEXT: vsrl.vi v10, v8, 1 @@ -1051,8 +1037,7 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; RV64I-NEXT: add a3, a3, a5 ; RV64I-NEXT: slli a5, a4, 32 ; RV64I-NEXT: add a4, a4, a5 -; RV64I-NEXT: li a5, 1 -; RV64I-NEXT: vsub.vx v9, v8, a5 +; RV64I-NEXT: vadd.vi v9, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v9 ; RV64I-NEXT: vsrl.vi v9, v8, 1 @@ -1123,9 +1108,8 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { ; RVI-NEXT: li a1, 32 ; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RVI-NEXT: vle8.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: li a1, 85 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -1198,10 +1182,9 @@ define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RVI-NEXT: vle16.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: lui a1, 5 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -1269,10 +1252,9 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RVI-NEXT: vle32.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: lui a1, 349525 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -1348,11 +1330,10 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32I-NEXT: vmv.v.x v10, a1 -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32I-NEXT: vsub.vx v12, v8, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vadd.vi v12, v8, -1 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v12 ; RV32I-NEXT: vsrl.vi v12, v8, 1 @@ -1404,8 +1385,7 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; RV64I-NEXT: add a3, a3, a5 ; RV64I-NEXT: slli a5, a4, 32 ; RV64I-NEXT: add a4, a4, a5 -; RV64I-NEXT: li a5, 1 -; RV64I-NEXT: vsub.vx v10, v8, a5 +; RV64I-NEXT: vadd.vi v10, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v10 ; RV64I-NEXT: vsrl.vi v10, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 39fd70beb9ee2..0436a27409f81 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -4031,8 +4031,7 @@ define void @sub_vi_v16i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x @@ -4046,8 +4045,7 @@ define void @sub_vi_v8i16(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x @@ -4061,8 +4059,7 @@ define void @sub_vi_v4i32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x @@ -4076,8 +4073,7 @@ define void @sub_vi_v2i64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll index 3a8d08f306a51..f3ad54afa21c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll @@ -39,9 +39,8 @@ define @i1_zext_add_commuted( %a, @i1_zext_sub( %a, %b) { ; CHECK-LABEL: i1_zext_sub: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %zext = zext %a to %sub = sub %b, %zext diff --git a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll index 0bd82e654e021..d34b401529202 100644 --- a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll @@ -75,9 +75,8 @@ define @test_urem_vec_odd_divisor_eq0( %x) define @test_urem_vec_even_divisor_eq1( %x) nounwind { ; RV32-LABEL: test_urem_vec_even_divisor_eq1: ; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v8, v8, a0 +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vadd.vi v8, v8, -1 ; RV32-NEXT: lui a0, 1048571 ; RV32-NEXT: addi a0, a0, -1365 ; RV32-NEXT: vmul.vx v8, v8, a0 @@ -93,9 +92,8 @@ define @test_urem_vec_even_divisor_eq1( %x) ; ; RV64-LABEL: test_urem_vec_even_divisor_eq1: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vadd.vi v8, v8, -1 ; RV64-NEXT: lui a0, 1048571 ; RV64-NEXT: addi a0, a0, -1365 ; RV64-NEXT: vmul.vx v8, v8, a0 @@ -117,9 +115,8 @@ define @test_urem_vec_even_divisor_eq1( %x) define @test_urem_vec_odd_divisor_eq1( %x) nounwind { ; RV32-LABEL: test_urem_vec_odd_divisor_eq1: ; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v8, v8, a0 +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vadd.vi v8, v8, -1 ; RV32-NEXT: lui a0, 1048573 ; RV32-NEXT: addi a0, a0, -819 ; RV32-NEXT: vmul.vx v8, v8, a0 @@ -132,9 +129,8 @@ define @test_urem_vec_odd_divisor_eq1( %x) ; ; RV64-LABEL: test_urem_vec_odd_divisor_eq1: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vadd.vi v8, v8, -1 ; RV64-NEXT: lui a0, 1048573 ; RV64-NEXT: addi a0, a0, -819 ; RV64-NEXT: vmul.vx v8, v8, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll index 58b6f0253b99a..d2ef711fc0d74 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll @@ -156,7 +156,6 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ; NO_FOLDING-NEXT: vlm.v v9, (a1) ; NO_FOLDING-NEXT: vlm.v v10, (a2) ; NO_FOLDING-NEXT: vmv.v.i v11, 0 -; NO_FOLDING-NEXT: li a0, 1 ; NO_FOLDING-NEXT: vmv.v.v v0, v8 ; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 ; NO_FOLDING-NEXT: vmv.v.v v0, v9 @@ -166,7 +165,7 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 ; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 ; NO_FOLDING-NEXT: vmv.v.v v0, v8 -; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; NO_FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t ; NO_FOLDING-NEXT: vor.vv v8, v9, v10 ; NO_FOLDING-NEXT: vor.vv v8, v8, v11 ; NO_FOLDING-NEXT: ret @@ -178,7 +177,6 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ; FOLDING-NEXT: vlm.v v9, (a1) ; FOLDING-NEXT: vlm.v v10, (a2) ; FOLDING-NEXT: vmv.v.i v11, 0 -; FOLDING-NEXT: li a0, 1 ; FOLDING-NEXT: vmv.v.v v0, v8 ; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 ; FOLDING-NEXT: vmv.v.v v0, v9 @@ -188,7 +186,7 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ; FOLDING-NEXT: vmul.vv v9, v12, v9 ; FOLDING-NEXT: vsub.vv v11, v12, v10 ; FOLDING-NEXT: vmv.v.v v0, v8 -; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t ; FOLDING-NEXT: vor.vv v8, v9, v10 ; FOLDING-NEXT: vor.vv v8, v8, v11 ; FOLDING-NEXT: ret @@ -214,7 +212,6 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; NO_FOLDING-NEXT: vlm.v v9, (a1) ; NO_FOLDING-NEXT: vlm.v v10, (a2) ; NO_FOLDING-NEXT: vmv.v.i v11, 0 -; NO_FOLDING-NEXT: li a0, 1 ; NO_FOLDING-NEXT: vmv1r.v v0, v8 ; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 ; NO_FOLDING-NEXT: vmv1r.v v0, v9 @@ -224,7 +221,7 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 ; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 ; NO_FOLDING-NEXT: vmv1r.v v0, v8 -; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; NO_FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t ; NO_FOLDING-NEXT: vor.vv v8, v9, v10 ; NO_FOLDING-NEXT: vor.vv v8, v8, v11 ; NO_FOLDING-NEXT: ret @@ -236,7 +233,6 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; FOLDING-NEXT: vlm.v v9, (a1) ; FOLDING-NEXT: vlm.v v10, (a2) ; FOLDING-NEXT: vmv.v.i v11, 0 -; FOLDING-NEXT: li a0, 1 ; FOLDING-NEXT: vmv1r.v v0, v8 ; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 ; FOLDING-NEXT: vmv1r.v v0, v9 @@ -246,7 +242,7 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; FOLDING-NEXT: vmul.vv v9, v12, v9 ; FOLDING-NEXT: vsub.vv v11, v12, v10 ; FOLDING-NEXT: vmv1r.v v0, v8 -; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t ; FOLDING-NEXT: vor.vv v8, v9, v10 ; FOLDING-NEXT: vor.vv v8, v8, v11 ; FOLDING-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll index b43de0de49514..7442be92fffcd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll @@ -853,9 +853,8 @@ define @vsub_vx_mask_nxv8i32( %va, i32 sign define @vsub_vi_nxv8i32_one( %va) { ; CHECK-LABEL: vsub_vi_nxv8i32_one: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret %vc = sub %va, splat (i32 1) ret %vc @@ -864,9 +863,8 @@ define @vsub_vi_nxv8i32_one( %va) { define @vsub_vi_nxv8i32_minusone( %va) { ; CHECK-LABEL: vsub_vi_nxv8i32_minusone: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: ret %vc = sub %va, splat (i32 -1) ret %vc @@ -875,9 +873,8 @@ define @vsub_vi_nxv8i32_minusone( %va) { define @vsub_vi_nxv8i32_15( %va) { ; CHECK-LABEL: vsub_vi_nxv8i32_15: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 15 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -15 ; CHECK-NEXT: ret %vc = sub %va, splat (i32 15) ret %vc @@ -886,9 +883,8 @@ define @vsub_vi_nxv8i32_15( %va) { define @vsub_vi_nxv8i32_16( %va) { ; CHECK-LABEL: vsub_vi_nxv8i32_16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -16 ; CHECK-NEXT: ret %vc = sub %va, splat (i32 16) ret %vc @@ -897,9 +893,8 @@ define @vsub_vi_nxv8i32_16( %va) { define @vsub_vi_nxv8i32_minus15( %va) { ; CHECK-LABEL: vsub_vi_nxv8i32_minus15: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -15 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 15 ; CHECK-NEXT: ret %vc = sub %va, splat (i32 -15) ret %vc @@ -919,9 +914,8 @@ define @vsub_vi_nxv8i32_minus16( %va) { define @vsub_vi_mask_nxv8i32( %va, %mask) { ; CHECK-LABEL: vsub_vi_mask_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 7 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, -7, v0.t ; CHECK-NEXT: ret %vs = select %mask, splat (i32 7), zeroinitializer %vc = sub %va, %vs