From 33314693f536a66390b0e022f2f00be94de5b06e Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 10 May 2023 10:49:30 -0700 Subject: [PATCH] Revert "[RISCV][InsertVSETVLI] Avoid VL toggles for extractelement patterns" This reverts commit 657d20dc75252f0c8415ada5214affccc3c98efe. A correctness problem was reported against the review and the fix warrants re-review. --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 38 --- llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll | 4 + .../RISCV/rvv/fixed-vector-shuffle-reverse.ll | 2 + .../CodeGen/RISCV/rvv/fixed-vectors-abs.ll | 2 +- .../RISCV/rvv/fixed-vectors-extract-i1.ll | 2 + .../rvv/fixed-vectors-extract-subvector.ll | 1 + .../RISCV/rvv/fixed-vectors-extract.ll | 34 ++- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 62 ++--- .../RISCV/rvv/fixed-vectors-fp2i-sat.ll | 6 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 24 +- .../CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll | 16 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 54 ++-- .../CodeGen/RISCV/rvv/fixed-vectors-load.ll | 6 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 255 +++++++++++++----- .../RISCV/rvv/fixed-vectors-masked-scatter.ll | 237 +++++++++++----- .../CodeGen/RISCV/rvv/fixed-vectors-store.ll | 8 +- .../test/CodeGen/RISCV/rvv/fold-vector-cmp.ll | 1 + .../test/CodeGen/RISCV/rvv/shuffle-reverse.ll | 14 + llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 8 + llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll | 2 + .../CodeGen/RISCV/urem-seteq-illegal-types.ll | 2 + 21 files changed, 510 insertions(+), 268 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index faffe3d12fa39..76d0974e70371 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -85,18 +85,6 @@ static bool isScalarMoveInstr(const MachineInstr &MI) { } } -static bool isVSlideInstr(const MachineInstr &MI) { - switch (getRVVMCOpcode(MI.getOpcode())) { - default: - return false; - case RISCV::VSLIDEDOWN_VX: - case RISCV::VSLIDEDOWN_VI: - case RISCV::VSLIDEUP_VX: - case RISCV::VSLIDEUP_VI: - return true; - } -} - /// Get the EEW for a load or store instruction. Return std::nullopt if MI is /// not a load or store which ignores SEW. static std::optional getEEWForLoadStore(const MachineInstr &MI) { @@ -830,11 +818,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, .addImm(Info.encodeVTYPE()); } -static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { - auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL); - return Fractional || LMul == 1; -} - /// Return true if a VSETVLI is required to transition from CurInfo to Require /// before MI. bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, @@ -862,27 +845,6 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, } } - // A slidedown/slideup with a VL of 1 whose destination is an IMPLICIT_DEF - // can use any VL/SEW combination which writes at least the first element. - // Notes: - // * VL=1 is special only because we have existing support for zero vs - // non-zero VL. We could generalize this if we had a VL > C predicate. - // * The LMUL1 restriction is for machines whose latency may depend on VL. - // * As above, this is only legal for IMPLICIT_DEF, not TA. - if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && - isLMUL1OrSmaller(CurInfo.getVLMUL())) { - auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); - if (VRegDef && VRegDef->isImplicitDef() && - CurInfo.getSEW() >= Require.getSEW()) { - Used.VLAny = false; - Used.VLZeroness = true; - Used.SEW = false; - Used.LMUL = false; - Used.SEWLMULRatio = false; - Used.TailPolicy = false; - } - } - if (CurInfo.isCompatible(Used, Require)) return false; diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index 2210ed4720c5a..353f8119bd331 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -10,6 +10,7 @@ define i1 @extractelt_nxv1i1(* %x, i64 %idx) nounwind { ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -27,6 +28,7 @@ define i1 @extractelt_nxv2i1(* %x, i64 %idx) nounwind { ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -44,6 +46,7 @@ define i1 @extractelt_nxv4i1(* %x, i64 %idx) nounwind { ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -61,6 +64,7 @@ define i1 @extractelt_nxv8i1(* %x, i64 %idx) nounwind { ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll index 99d7b4963db76..dd5921313e611 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll @@ -16,7 +16,9 @@ define <2 x i1> @reverse_v2i1(<2 x i1> %a) { ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vslideup.vi v9, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll index 29d48fb3f41a6..f11a7b44edaab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll @@ -59,9 +59,9 @@ define void @abs_v6i16(ptr %x) { ; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x i16>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index 2ee0e44253d63..6bde962dac0a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -30,6 +30,7 @@ define i1 @extractelt_v2i1(ptr %x, i64 %idx) nounwind { ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -47,6 +48,7 @@ define i1 @extractelt_v4i1(ptr %x, i64 %idx) nounwind { ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll index 1913d144139fb..a8bd8dbb5a08a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -248,6 +248,7 @@ define void @extract_v8i1_v64i1_8(ptr %x, ptr %y) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vlm.v v8, (a0) +; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX1-NEXT: vslidedown.vi v8, v8, 1 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; LMULMAX1-NEXT: vsm.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index 16c7611a84430..b860916a3adf0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -9,6 +9,7 @@ define i8 @extractelt_v16i8(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -22,6 +23,7 @@ define i16 @extractelt_v8i16(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -35,6 +37,7 @@ define i32 @extractelt_v4i32(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -71,6 +74,7 @@ define half @extractelt_v8f16(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -84,6 +88,7 @@ define float @extractelt_v4f32(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -247,6 +252,7 @@ define i8 @extractelt_v16i8_idx(ptr %x, i32 zeroext %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -260,6 +266,7 @@ define i16 @extractelt_v8i16_idx(ptr %x, i32 zeroext %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -274,6 +281,7 @@ define i32 @extractelt_v4i32_idx(ptr %x, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -289,10 +297,10 @@ define i64 @extractelt_v2i64_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret @@ -302,6 +310,7 @@ define i64 @extractelt_v2i64_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vslidedown.vx v8, v8, a1 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -317,6 +326,7 @@ define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -332,6 +342,7 @@ define float @extractelt_v4f32_idx(ptr %x, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -347,6 +358,7 @@ define double @extractelt_v2f64_idx(ptr %x, i32 zeroext %idx) nounwind { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8 +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -517,8 +529,8 @@ define void @store_extractelt_v16i8(ptr %x, ptr %p) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x @@ -532,8 +544,8 @@ define void @store_extractelt_v8i16(ptr %x, ptr %p) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vse16.v v8, (a1) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x @@ -547,8 +559,8 @@ define void @store_extractelt_v4i32(ptr %x, ptr %p) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x @@ -563,9 +575,9 @@ define void @store_extractelt_v2i64(ptr %x, ptr %p) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v9, v8, a0 ; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: vmv.x.s a2, v8 @@ -577,8 +589,8 @@ define void @store_extractelt_v2i64(ptr %x, ptr %p) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: ret %a = load <2 x i64>, ptr %x @@ -592,8 +604,8 @@ define void @store_extractelt_v2f64(ptr %x, ptr %p) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x @@ -615,6 +627,7 @@ define i32 @extractelt_add_v4i32(<4 x i32> %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vadd.vi v8, v8, 13 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -637,6 +650,7 @@ define i32 @extractelt_sub_v4i32(<4 x i32> %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vrsub.vi v8, v8, 13 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -651,6 +665,7 @@ define i32 @extractelt_mul_v4i32(<4 x i32> %x) { ; RV32NOM-NEXT: li a0, 13 ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32NOM-NEXT: vmul.vx v8, v8, a0 +; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2 ; RV32NOM-NEXT: vmv.x.s a0, v8 ; RV32NOM-NEXT: ret @@ -669,6 +684,7 @@ define i32 @extractelt_mul_v4i32(<4 x i32> %x) { ; RV64-NEXT: li a0, 13 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -696,6 +712,7 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) { ; RV32NOM-NEXT: vsra.vv v9, v8, v11 ; RV32NOM-NEXT: vsrl.vi v8, v8, 31 ; RV32NOM-NEXT: vadd.vv v8, v9, v8 +; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2 ; RV32NOM-NEXT: vmv.x.s a0, v8 ; RV32NOM-NEXT: ret @@ -731,6 +748,7 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) { ; RV64-NEXT: vsra.vv v8, v8, v11 ; RV64-NEXT: vsrl.vi v9, v8, 31 ; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -747,6 +765,7 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) { ; RV32NOM-NEXT: lui a0, 322639 ; RV32NOM-NEXT: addi a0, a0, -945 ; RV32NOM-NEXT: vmulhu.vx v8, v8, a0 +; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2 ; RV32NOM-NEXT: vmv.x.s a0, v8 ; RV32NOM-NEXT: srli a0, a0, 2 @@ -771,6 +790,7 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) { ; RV64-NEXT: addiw a0, a0, -945 ; RV64-NEXT: vmulhu.vx v8, v8, a0 ; RV64-NEXT: vsrl.vi v8, v8, 2 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 3e11193f9b6c4..135844362dbae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -47,9 +47,9 @@ define void @fadd_v6f16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -130,9 +130,9 @@ define void @fsub_v6f16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -213,9 +213,9 @@ define void @fmul_v6f16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -296,9 +296,9 @@ define void @fdiv_v6f16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -375,9 +375,9 @@ define void @fneg_v6f16(ptr %x) { ; LMULMAX1-RV64-NEXT: vfneg.v v8, v8 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -450,9 +450,9 @@ define void @fabs_v6f16(ptr %x) { ; LMULMAX1-RV64-NEXT: vfabs.v v8, v8 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -532,9 +532,9 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -616,9 +616,9 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; LMULMAX1-RV64-NEXT: vfsgnj.vf v8, v8, fa0 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -701,9 +701,9 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vfsgnjn.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -776,9 +776,9 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) ; LMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v8 ; LMULMAX1-RV32-NEXT: vfsgnjn.vv v8, v9, v10 +; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX1-RV32-NEXT: addi a1, a0, 4 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX1-RV32-NEXT: vse16.v v9, (a1) ; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) @@ -792,9 +792,9 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vle32.v v9, (a1) ; LMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 ; LMULMAX1-RV64-NEXT: vfsgnjn.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX1-RV64-NEXT: addi a1, a0, 4 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX1-RV64-NEXT: vse16.v v9, (a1) ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) @@ -865,9 +865,9 @@ define void @sqrt_v6f16(ptr %x) { ; LMULMAX1-RV64-NEXT: vfsqrt.v v8, v8 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -951,9 +951,9 @@ define void @fma_v6f16(ptr %x, ptr %y, ptr %z) { ; LMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -1047,9 +1047,9 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) { ; LMULMAX1-RV64-NEXT: vfmsac.vv v10, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -1878,9 +1878,9 @@ define void @fadd_vf_v6f16(ptr %x, half %y) { ; LMULMAX1-RV64-NEXT: vfadd.vf v8, v8, fa0 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -1960,9 +1960,9 @@ define void @fadd_fv_v6f16(ptr %x, half %y) { ; LMULMAX1-RV64-NEXT: vfadd.vf v8, v8, fa0 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2042,9 +2042,9 @@ define void @fsub_vf_v6f16(ptr %x, half %y) { ; LMULMAX1-RV64-NEXT: vfsub.vf v8, v8, fa0 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2124,9 +2124,9 @@ define void @fsub_fv_v6f16(ptr %x, half %y) { ; LMULMAX1-RV64-NEXT: vfrsub.vf v8, v8, fa0 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2206,9 +2206,9 @@ define void @fmul_vf_v6f16(ptr %x, half %y) { ; LMULMAX1-RV64-NEXT: vfmul.vf v8, v8, fa0 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2288,9 +2288,9 @@ define void @fmul_fv_v6f16(ptr %x, half %y) { ; LMULMAX1-RV64-NEXT: vfmul.vf v8, v8, fa0 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2370,9 +2370,9 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) { ; LMULMAX1-RV64-NEXT: vfdiv.vf v8, v8, fa0 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2452,9 +2452,9 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) { ; LMULMAX1-RV64-NEXT: vfrdiv.vf v8, v8, fa0 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2538,9 +2538,9 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) { ; LMULMAX1-RV64-NEXT: vfmacc.vf v9, fa0, v8 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2629,9 +2629,9 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) { ; LMULMAX1-RV64-NEXT: vfmacc.vf v9, fa0, v8 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2721,9 +2721,9 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) { ; LMULMAX1-RV64-NEXT: vfmsac.vf v9, fa0, v8 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2871,9 +2871,9 @@ define void @trunc_v6f16(ptr %x) { ; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -2990,9 +2990,9 @@ define void @ceil_v6f16(ptr %x) { ; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -3113,9 +3113,9 @@ define void @floor_v6f16(ptr %x) { ; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -3236,9 +3236,9 @@ define void @round_v6f16(ptr %x) { ; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -3478,9 +3478,9 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; LMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x @@ -3574,9 +3574,9 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; LMULMAX1-RV64-NEXT: vfmsac.vv v10, v8, v9 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) +; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 ; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = load <6 x half>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll index 9b46d4441a21f..608ea4b6f345b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -191,6 +191,7 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v9, v8, 1 ; RV32-NEXT: vfmv.f.s fa5, v9 ; RV32-NEXT: lui a0, %hi(.LCPI10_0) @@ -210,7 +211,7 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) { ; RV32-NEXT: fmin.d fa5, fa5, fa3 ; RV32-NEXT: fcvt.w.d a3, fa5, rtz ; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a2 ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vse8.v v8, (a1) @@ -220,6 +221,7 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vslidedown.vi v9, v8, 1 ; RV64-NEXT: vfmv.f.s fa5, v9 ; RV64-NEXT: lui a0, %hi(.LCPI10_0) @@ -239,7 +241,7 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) { ; RV64-NEXT: fmin.d fa5, fa5, fa3 ; RV64-NEXT: fcvt.l.d a3, fa5, rtz ; RV64-NEXT: and a2, a2, a3 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV64-NEXT: vslide1down.vx v8, v8, a2 ; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: vse8.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index 88a5dbb1386a9..f453ba921f5cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -84,9 +84,9 @@ define void @fp2si_v3f32_v3i32(ptr %x, ptr %y) { ; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vle32.v v8, (a0) ; LMULMAX8RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vse32.v v9, (a0) ; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX8RV32-NEXT: vse32.v v8, (a1) @@ -97,9 +97,9 @@ define void @fp2si_v3f32_v3i32(ptr %x, ptr %y) { ; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vle32.v v8, (a0) ; LMULMAX8RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vse32.v v9, (a0) ; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX8RV64-NEXT: vse64.v v8, (a1) @@ -110,9 +110,9 @@ define void @fp2si_v3f32_v3i32(ptr %x, ptr %y) { ; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 +; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vse32.v v9, (a0) ; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1RV32-NEXT: vse32.v v8, (a1) @@ -123,9 +123,9 @@ define void @fp2si_v3f32_v3i32(ptr %x, ptr %y) { ; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 +; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1RV64-NEXT: vse64.v v8, (a1) @@ -142,9 +142,9 @@ define void @fp2ui_v3f32_v3i32(ptr %x, ptr %y) { ; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vle32.v v8, (a0) ; LMULMAX8RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vse32.v v9, (a0) ; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX8RV32-NEXT: vse32.v v8, (a1) @@ -155,9 +155,9 @@ define void @fp2ui_v3f32_v3i32(ptr %x, ptr %y) { ; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vle32.v v8, (a0) ; LMULMAX8RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vse32.v v9, (a0) ; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX8RV64-NEXT: vse64.v v8, (a1) @@ -168,9 +168,9 @@ define void @fp2ui_v3f32_v3i32(ptr %x, ptr %y) { ; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vse32.v v9, (a0) ; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1RV32-NEXT: vse32.v v8, (a1) @@ -181,9 +181,9 @@ define void @fp2ui_v3f32_v3i32(ptr %x, ptr %y) { ; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1RV64-NEXT: vse64.v v8, (a1) @@ -212,6 +212,7 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; LMULMAX8RV32: # %bb.0: ; LMULMAX8RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; LMULMAX8RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; LMULMAX8RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX8RV32-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX8RV32-NEXT: vmv.x.s a1, v8 ; LMULMAX8RV32-NEXT: slli a2, a1, 17 @@ -239,6 +240,7 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; LMULMAX8RV64-NEXT: lui a2, 8 ; LMULMAX8RV64-NEXT: addiw a2, a2, -1 ; LMULMAX8RV64-NEXT: and a1, a1, a2 +; LMULMAX8RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX8RV64-NEXT: vslidedown.vi v8, v9, 1 ; LMULMAX8RV64-NEXT: vmv.x.s a3, v8 ; LMULMAX8RV64-NEXT: and a2, a3, a2 @@ -258,6 +260,7 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; LMULMAX1RV32: # %bb.0: ; LMULMAX1RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; LMULMAX1RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; LMULMAX1RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX1RV32-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX1RV32-NEXT: vmv.x.s a1, v8 ; LMULMAX1RV32-NEXT: slli a2, a1, 17 @@ -285,6 +288,7 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; LMULMAX1RV64-NEXT: lui a2, 8 ; LMULMAX1RV64-NEXT: addiw a2, a2, -1 ; LMULMAX1RV64-NEXT: and a1, a1, a2 +; LMULMAX1RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX1RV64-NEXT: vslidedown.vi v8, v9, 1 ; LMULMAX1RV64-NEXT: vmv.x.s a3, v8 ; LMULMAX1RV64-NEXT: and a2, a3, a2 @@ -309,6 +313,7 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; LMULMAX8RV32: # %bb.0: ; LMULMAX8RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; LMULMAX8RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; LMULMAX8RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX8RV32-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX8RV32-NEXT: vmv.x.s a1, v8 ; LMULMAX8RV32-NEXT: slli a2, a1, 17 @@ -336,6 +341,7 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; LMULMAX8RV64-NEXT: lui a2, 16 ; LMULMAX8RV64-NEXT: addiw a2, a2, -1 ; LMULMAX8RV64-NEXT: and a1, a1, a2 +; LMULMAX8RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX8RV64-NEXT: vslidedown.vi v8, v9, 1 ; LMULMAX8RV64-NEXT: vmv.x.s a3, v8 ; LMULMAX8RV64-NEXT: and a2, a3, a2 @@ -355,6 +361,7 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; LMULMAX1RV32: # %bb.0: ; LMULMAX1RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; LMULMAX1RV32-NEXT: vfncvt.rtz.x.f.w v9, v8 +; LMULMAX1RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX1RV32-NEXT: vslidedown.vi v8, v9, 2 ; LMULMAX1RV32-NEXT: vmv.x.s a1, v8 ; LMULMAX1RV32-NEXT: slli a2, a1, 17 @@ -382,6 +389,7 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; LMULMAX1RV64-NEXT: lui a2, 16 ; LMULMAX1RV64-NEXT: addiw a2, a2, -1 ; LMULMAX1RV64-NEXT: and a1, a1, a2 +; LMULMAX1RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; LMULMAX1RV64-NEXT: vslidedown.vi v8, v9, 1 ; LMULMAX1RV64-NEXT: vmv.x.s a3, v8 ; LMULMAX1RV64-NEXT: and a2, a3, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll index b60f9405a760f..65603e634c72f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -90,9 +90,9 @@ define void @si2fp_v3i32_v3f32(ptr %x, ptr %y) { ; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vle32.v v8, (a0) ; LMULMAX8RV32-NEXT: vfcvt.f.x.v v8, v8 +; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vse32.v v9, (a0) ; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX8RV32-NEXT: vse32.v v8, (a1) @@ -103,9 +103,9 @@ define void @si2fp_v3i32_v3f32(ptr %x, ptr %y) { ; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vle32.v v8, (a0) ; LMULMAX8RV64-NEXT: vfcvt.f.x.v v8, v8 +; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vse32.v v9, (a0) ; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX8RV64-NEXT: vse64.v v8, (a1) @@ -116,9 +116,9 @@ define void @si2fp_v3i32_v3f32(ptr %x, ptr %y) { ; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1RV32-NEXT: vfcvt.f.x.v v8, v8 +; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vse32.v v9, (a0) ; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1RV32-NEXT: vse32.v v8, (a1) @@ -129,9 +129,9 @@ define void @si2fp_v3i32_v3f32(ptr %x, ptr %y) { ; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1RV64-NEXT: vfcvt.f.x.v v8, v8 +; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1RV64-NEXT: vse64.v v8, (a1) @@ -148,9 +148,9 @@ define void @ui2fp_v3i32_v3f32(ptr %x, ptr %y) { ; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vle32.v v8, (a0) ; LMULMAX8RV32-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV32-NEXT: vse32.v v9, (a0) ; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX8RV32-NEXT: vse32.v v8, (a1) @@ -161,9 +161,9 @@ define void @ui2fp_v3i32_v3f32(ptr %x, ptr %y) { ; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vle32.v v8, (a0) ; LMULMAX8RV64-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX8RV64-NEXT: vse32.v v9, (a0) ; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX8RV64-NEXT: vse64.v v8, (a1) @@ -174,9 +174,9 @@ define void @ui2fp_v3i32_v3f32(ptr %x, ptr %y) { ; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1RV32-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV32-NEXT: vse32.v v9, (a0) ; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX1RV32-NEXT: vse32.v v8, (a1) @@ -187,9 +187,9 @@ define void @ui2fp_v3i32_v3f32(ptr %x, ptr %y) { ; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1RV64-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 ; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; LMULMAX1RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; LMULMAX1RV64-NEXT: vse64.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 1788d957229b3..42b074c135fef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -61,9 +61,9 @@ define void @add_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -160,9 +160,9 @@ define void @sub_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vsub.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -259,9 +259,9 @@ define void @mul_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -358,9 +358,9 @@ define void @and_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vand.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -457,9 +457,9 @@ define void @or_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -556,9 +556,9 @@ define void @xor_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vxor.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -655,9 +655,9 @@ define void @lshr_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vsrl.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -754,9 +754,9 @@ define void @ashr_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vsra.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -853,9 +853,9 @@ define void @shl_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vsll.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -967,9 +967,9 @@ define void @sdiv_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vslideup.vi v8, v10, 4 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1081,9 +1081,9 @@ define void @srem_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vslideup.vi v8, v10, 4 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1195,9 +1195,9 @@ define void @udiv_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vslideup.vi v8, v10, 4 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1309,9 +1309,9 @@ define void @urem_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vslideup.vi v8, v10, 4 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1536,9 +1536,9 @@ define void @mulhu_v6i16(ptr %x) { ; RV64-NEXT: vslideup.vi v9, v8, 4 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v9, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v9, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1763,9 +1763,9 @@ define void @mulhs_v6i16(ptr %x) { ; RV64-NEXT: vslideup.vi v8, v9, 4 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1933,9 +1933,9 @@ define void @smin_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vmin.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2035,9 +2035,9 @@ define void @smin_vx_v6i16(ptr %x, i16 %y) { ; RV64-NEXT: vmin.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2119,9 +2119,9 @@ define void @smin_xv_v6i16(ptr %x, i16 %y) { ; RV64-NEXT: vmin.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2205,9 +2205,9 @@ define void @smax_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vmax.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2307,9 +2307,9 @@ define void @smax_vx_v6i16(ptr %x, i16 %y) { ; RV64-NEXT: vmax.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2391,9 +2391,9 @@ define void @smax_xv_v6i16(ptr %x, i16 %y) { ; RV64-NEXT: vmax.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2477,9 +2477,9 @@ define void @umin_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vminu.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2579,9 +2579,9 @@ define void @umin_vx_v6i16(ptr %x, i16 %y) { ; RV64-NEXT: vminu.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2663,9 +2663,9 @@ define void @umin_xv_v6i16(ptr %x, i16 %y) { ; RV64-NEXT: vminu.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2749,9 +2749,9 @@ define void @umax_v6i16(ptr %x, ptr %y) { ; RV64-NEXT: vmaxu.vv v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2851,9 +2851,9 @@ define void @umax_vx_v6i16(ptr %x, i16 %y) { ; RV64-NEXT: vmaxu.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2935,9 +2935,9 @@ define void @umax_xv_v6i16(ptr %x, i16 %y) { ; RV64-NEXT: vmaxu.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <6 x i16>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll index 2e5f396694104..ec47b5f7b3dd6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll @@ -33,8 +33,8 @@ define <5 x i8> @load_v5i8_align1(ptr %p) { ; RV32-NEXT: or a1, a3, a1 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vslidedown.vi v9, v8, 3 ; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 3 ; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vslidedown.vi v9, v8, 2 ; RV32-NEXT: vmv.x.s a2, v9 @@ -65,8 +65,8 @@ define <5 x i8> @load_v5i8_align1(ptr %p) { ; RV64-NEXT: or a1, a3, a1 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vslidedown.vi v9, v8, 3 ; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 3 ; RV64-NEXT: vmv.x.s a1, v9 ; RV64-NEXT: vslidedown.vi v9, v8, 2 ; RV64-NEXT: vmv.x.s a2, v9 @@ -141,9 +141,9 @@ define <6 x half> @load_v6f16(ptr %p) { ; RV64-NEXT: vslide1down.vx v8, v8, a2 ; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: sd a2, 0(a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %x = load <6 x half>, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 946e7d2aaa3dc..48a7f8bf6de8c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -330,6 +330,7 @@ define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: srai a2, a1, 31 @@ -391,6 +392,7 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: andi a1, a1, 255 @@ -796,12 +798,13 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB12_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_8 @@ -1058,6 +1061,7 @@ define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: srai a2, a1, 31 @@ -1119,6 +1123,7 @@ define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: lui a2, 16 @@ -1537,14 +1542,15 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB23_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_8 @@ -1685,14 +1691,15 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB24_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_8 @@ -1838,15 +1845,16 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB25_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_8 @@ -1989,13 +1997,14 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB26_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_8 @@ -2149,12 +2158,12 @@ define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: srai a1, a1, 31 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9 ; RV32ZVE32F-NEXT: srai a2, a2, 31 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vse32.v v9, (a0) ; RV32ZVE32F-NEXT: addi a3, a0, 8 ; RV32ZVE32F-NEXT: vse32.v v8, (a3) @@ -2212,10 +2221,10 @@ define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV32ZVE32F-NEXT: sw zero, 12(a0) ; RV32ZVE32F-NEXT: sw zero, 4(a0) -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vse32.v v9, (a0) ; RV32ZVE32F-NEXT: addi a0, a0, 8 ; RV32ZVE32F-NEXT: vse32.v v8, (a0) @@ -2624,12 +2633,13 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB35_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -2772,12 +2782,13 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB36_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -2925,13 +2936,14 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB37_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -3079,12 +3091,13 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB38_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -3228,12 +3241,13 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB39_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -3384,13 +3398,14 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB40_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -4454,6 +4469,7 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 ; RV64ZVE32F-NEXT: beqz a7, .LBB48_12 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a7, v9 ; RV64ZVE32F-NEXT: slli a7, a7, 3 @@ -4473,6 +4489,7 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB48_10 ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s t0, v8 ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a1, t0 @@ -4480,6 +4497,7 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB48_11 ; RV64ZVE32F-NEXT: .LBB48_14: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s t1, v9 ; RV64ZVE32F-NEXT: slli t1, t1, 3 @@ -4505,6 +4523,7 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi a6, a6, -128 ; RV64ZVE32F-NEXT: beqz a6, .LBB48_17 ; RV64ZVE32F-NEXT: .LBB48_19: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -4729,6 +4748,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 ; RV64ZVE32F-NEXT: beqz a7, .LBB49_12 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a7, v9 ; RV64ZVE32F-NEXT: slli a7, a7, 3 @@ -4748,6 +4768,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB49_10 ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s t0, v8 ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a1, t0 @@ -4755,6 +4776,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB49_11 ; RV64ZVE32F-NEXT: .LBB49_14: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s t1, v9 ; RV64ZVE32F-NEXT: slli t1, t1, 3 @@ -4780,6 +4802,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a6, a6, -128 ; RV64ZVE32F-NEXT: beqz a6, .LBB49_17 ; RV64ZVE32F-NEXT: .LBB49_19: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -5008,6 +5031,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 ; RV64ZVE32F-NEXT: beqz a7, .LBB50_12 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a7, v9 ; RV64ZVE32F-NEXT: andi a7, a7, 255 @@ -5028,6 +5052,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi t0, a5, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB50_10 ; RV64ZVE32F-NEXT: .LBB50_13: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s t0, v8 ; RV64ZVE32F-NEXT: andi t0, t0, 255 ; RV64ZVE32F-NEXT: slli t0, t0, 3 @@ -5036,6 +5061,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi t1, a5, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB50_11 ; RV64ZVE32F-NEXT: .LBB50_14: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s t1, v9 ; RV64ZVE32F-NEXT: andi t1, t1, 255 @@ -5063,6 +5089,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a5, a5, -128 ; RV64ZVE32F-NEXT: beqz a5, .LBB50_17 ; RV64ZVE32F-NEXT: .LBB50_19: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -5246,8 +5273,8 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a5, v0 -; RV64ZVE32F-NEXT: andi a3, a5, 1 +; RV64ZVE32F-NEXT: vmv.x.s a6, v0 +; RV64ZVE32F-NEXT: andi a3, a6, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB51_3 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -5255,14 +5282,14 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: ld a3, 0(a3) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: bnez a4, .LBB51_4 ; RV64ZVE32F-NEXT: .LBB51_2: ; RV64ZVE32F-NEXT: ld a4, 8(a2) ; RV64ZVE32F-NEXT: j .LBB51_5 ; RV64ZVE32F-NEXT: .LBB51_3: ; RV64ZVE32F-NEXT: ld a3, 0(a2) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: beqz a4, .LBB51_2 ; RV64ZVE32F-NEXT: .LBB51_4: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -5273,49 +5300,52 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: ld a4, 0(a4) ; RV64ZVE32F-NEXT: .LBB51_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: andi a5, a6, 4 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: beqz a6, .LBB51_7 +; RV64ZVE32F-NEXT: beqz a5, .LBB51_7 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 -; RV64ZVE32F-NEXT: vmv.x.s a6, v9 -; RV64ZVE32F-NEXT: slli a6, a6, 3 -; RV64ZVE32F-NEXT: add a6, a1, a6 -; RV64ZVE32F-NEXT: ld a6, 0(a6) +; RV64ZVE32F-NEXT: vmv.x.s a5, v9 +; RV64ZVE32F-NEXT: slli a5, a5, 3 +; RV64ZVE32F-NEXT: add a5, a1, a5 +; RV64ZVE32F-NEXT: ld a5, 0(a5) ; RV64ZVE32F-NEXT: j .LBB51_8 ; RV64ZVE32F-NEXT: .LBB51_7: -; RV64ZVE32F-NEXT: ld a6, 16(a2) +; RV64ZVE32F-NEXT: ld a5, 16(a2) ; RV64ZVE32F-NEXT: .LBB51_8: # %else5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; RV64ZVE32F-NEXT: andi a7, a5, 8 +; RV64ZVE32F-NEXT: andi a7, a6, 8 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 ; RV64ZVE32F-NEXT: beqz a7, .LBB51_12 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a7, v9 ; RV64ZVE32F-NEXT: slli a7, a7, 3 ; RV64ZVE32F-NEXT: add a7, a1, a7 ; RV64ZVE32F-NEXT: ld a7, 0(a7) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: bnez t0, .LBB51_13 ; RV64ZVE32F-NEXT: .LBB51_10: ; RV64ZVE32F-NEXT: ld t0, 32(a2) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: bnez t1, .LBB51_14 ; RV64ZVE32F-NEXT: .LBB51_11: ; RV64ZVE32F-NEXT: ld t1, 40(a2) ; RV64ZVE32F-NEXT: j .LBB51_15 ; RV64ZVE32F-NEXT: .LBB51_12: ; RV64ZVE32F-NEXT: ld a7, 24(a2) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB51_10 ; RV64ZVE32F-NEXT: .LBB51_13: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s t0, v8 ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a1, t0 ; RV64ZVE32F-NEXT: ld t0, 0(t0) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB51_11 ; RV64ZVE32F-NEXT: .LBB51_14: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s t1, v9 ; RV64ZVE32F-NEXT: slli t1, t1, 3 @@ -5323,7 +5353,7 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB51_15: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: andi t2, a5, 64 +; RV64ZVE32F-NEXT: andi t2, a6, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz t2, .LBB51_18 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16 @@ -5331,16 +5361,17 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli t2, t2, 3 ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: ld t2, 0(t2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: bnez a5, .LBB51_19 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: bnez a6, .LBB51_19 ; RV64ZVE32F-NEXT: .LBB51_17: ; RV64ZVE32F-NEXT: ld a1, 56(a2) ; RV64ZVE32F-NEXT: j .LBB51_20 ; RV64ZVE32F-NEXT: .LBB51_18: ; RV64ZVE32F-NEXT: ld t2, 48(a2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: beqz a5, .LBB51_17 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: beqz a6, .LBB51_17 ; RV64ZVE32F-NEXT: .LBB51_19: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -5349,7 +5380,7 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB51_20: # %else20 ; RV64ZVE32F-NEXT: sd a3, 0(a0) ; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a6, 16(a0) +; RV64ZVE32F-NEXT: sd a5, 16(a0) ; RV64ZVE32F-NEXT: sd a7, 24(a0) ; RV64ZVE32F-NEXT: sd t0, 32(a0) ; RV64ZVE32F-NEXT: sd t1, 40(a0) @@ -5522,8 +5553,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a5, v0 -; RV64ZVE32F-NEXT: andi a3, a5, 1 +; RV64ZVE32F-NEXT: vmv.x.s a6, v0 +; RV64ZVE32F-NEXT: andi a3, a6, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB52_3 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -5531,14 +5562,14 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: ld a3, 0(a3) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: bnez a4, .LBB52_4 ; RV64ZVE32F-NEXT: .LBB52_2: ; RV64ZVE32F-NEXT: ld a4, 8(a2) ; RV64ZVE32F-NEXT: j .LBB52_5 ; RV64ZVE32F-NEXT: .LBB52_3: ; RV64ZVE32F-NEXT: ld a3, 0(a2) -; RV64ZVE32F-NEXT: andi a4, a5, 2 +; RV64ZVE32F-NEXT: andi a4, a6, 2 ; RV64ZVE32F-NEXT: beqz a4, .LBB52_2 ; RV64ZVE32F-NEXT: .LBB52_4: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -5549,49 +5580,52 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: ld a4, 0(a4) ; RV64ZVE32F-NEXT: .LBB52_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: andi a5, a6, 4 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: beqz a6, .LBB52_7 +; RV64ZVE32F-NEXT: beqz a5, .LBB52_7 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 -; RV64ZVE32F-NEXT: vmv.x.s a6, v9 -; RV64ZVE32F-NEXT: slli a6, a6, 3 -; RV64ZVE32F-NEXT: add a6, a1, a6 -; RV64ZVE32F-NEXT: ld a6, 0(a6) +; RV64ZVE32F-NEXT: vmv.x.s a5, v9 +; RV64ZVE32F-NEXT: slli a5, a5, 3 +; RV64ZVE32F-NEXT: add a5, a1, a5 +; RV64ZVE32F-NEXT: ld a5, 0(a5) ; RV64ZVE32F-NEXT: j .LBB52_8 ; RV64ZVE32F-NEXT: .LBB52_7: -; RV64ZVE32F-NEXT: ld a6, 16(a2) +; RV64ZVE32F-NEXT: ld a5, 16(a2) ; RV64ZVE32F-NEXT: .LBB52_8: # %else5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; RV64ZVE32F-NEXT: andi a7, a5, 8 +; RV64ZVE32F-NEXT: andi a7, a6, 8 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 ; RV64ZVE32F-NEXT: beqz a7, .LBB52_12 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a7, v9 ; RV64ZVE32F-NEXT: slli a7, a7, 3 ; RV64ZVE32F-NEXT: add a7, a1, a7 ; RV64ZVE32F-NEXT: ld a7, 0(a7) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: bnez t0, .LBB52_13 ; RV64ZVE32F-NEXT: .LBB52_10: ; RV64ZVE32F-NEXT: ld t0, 32(a2) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: bnez t1, .LBB52_14 ; RV64ZVE32F-NEXT: .LBB52_11: ; RV64ZVE32F-NEXT: ld t1, 40(a2) ; RV64ZVE32F-NEXT: j .LBB52_15 ; RV64ZVE32F-NEXT: .LBB52_12: ; RV64ZVE32F-NEXT: ld a7, 24(a2) -; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: andi t0, a6, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB52_10 ; RV64ZVE32F-NEXT: .LBB52_13: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s t0, v8 ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a1, t0 ; RV64ZVE32F-NEXT: ld t0, 0(t0) -; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: andi t1, a6, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB52_11 ; RV64ZVE32F-NEXT: .LBB52_14: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s t1, v9 ; RV64ZVE32F-NEXT: slli t1, t1, 3 @@ -5599,7 +5633,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB52_15: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: andi t2, a5, 64 +; RV64ZVE32F-NEXT: andi t2, a6, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz t2, .LBB52_18 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16 @@ -5607,16 +5641,17 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli t2, t2, 3 ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: ld t2, 0(t2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: bnez a5, .LBB52_19 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: bnez a6, .LBB52_19 ; RV64ZVE32F-NEXT: .LBB52_17: ; RV64ZVE32F-NEXT: ld a1, 56(a2) ; RV64ZVE32F-NEXT: j .LBB52_20 ; RV64ZVE32F-NEXT: .LBB52_18: ; RV64ZVE32F-NEXT: ld t2, 48(a2) -; RV64ZVE32F-NEXT: andi a5, a5, -128 -; RV64ZVE32F-NEXT: beqz a5, .LBB52_17 +; RV64ZVE32F-NEXT: andi a6, a6, -128 +; RV64ZVE32F-NEXT: beqz a6, .LBB52_17 ; RV64ZVE32F-NEXT: .LBB52_19: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -5625,7 +5660,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB52_20: # %else20 ; RV64ZVE32F-NEXT: sd a3, 0(a0) ; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a6, 16(a0) +; RV64ZVE32F-NEXT: sd a5, 16(a0) ; RV64ZVE32F-NEXT: sd a7, 24(a0) ; RV64ZVE32F-NEXT: sd t0, 32(a0) ; RV64ZVE32F-NEXT: sd t1, 40(a0) @@ -5848,6 +5883,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 ; RV64ZVE32F-NEXT: beqz t0, .LBB53_12 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9 ; RV64ZVE32F-NEXT: and t0, t0, a5 @@ -5868,6 +5904,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: andi t1, a6, 16 ; RV64ZVE32F-NEXT: beqz t1, .LBB53_10 ; RV64ZVE32F-NEXT: .LBB53_13: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s t1, v8 ; RV64ZVE32F-NEXT: and t1, t1, a5 ; RV64ZVE32F-NEXT: slli t1, t1, 3 @@ -5876,6 +5913,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: andi t2, a6, 32 ; RV64ZVE32F-NEXT: beqz t2, .LBB53_11 ; RV64ZVE32F-NEXT: .LBB53_14: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s t2, v9 ; RV64ZVE32F-NEXT: and t2, t2, a5 @@ -5903,6 +5941,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: andi a6, a6, -128 ; RV64ZVE32F-NEXT: beqz a6, .LBB53_17 ; RV64ZVE32F-NEXT: .LBB53_19: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: and a2, a2, a5 @@ -6139,6 +6178,7 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV64ZVE32F-NEXT: andi a7, a5, 8 ; RV64ZVE32F-NEXT: beqz a7, .LBB54_7 ; RV64ZVE32F-NEXT: .LBB54_11: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8 ; RV64ZVE32F-NEXT: slli a7, a7, 3 @@ -6147,6 +6187,7 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV64ZVE32F-NEXT: andi t0, a5, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB54_8 ; RV64ZVE32F-NEXT: .LBB54_12: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s t0, v10 ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a1, t0 @@ -6154,12 +6195,14 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV64ZVE32F-NEXT: andi t1, a5, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB54_9 ; RV64ZVE32F-NEXT: .LBB54_13: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8 ; RV64ZVE32F-NEXT: slli t1, t1, 3 ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB54_14: # %else14 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: beqz t2, .LBB54_17 @@ -6178,6 +6221,7 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV64ZVE32F-NEXT: andi a5, a5, -128 ; RV64ZVE32F-NEXT: beqz a5, .LBB54_16 ; RV64ZVE32F-NEXT: .LBB54_18: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -6412,6 +6456,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: andi a7, a5, 8 ; RV64ZVE32F-NEXT: beqz a7, .LBB55_7 ; RV64ZVE32F-NEXT: .LBB55_11: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8 ; RV64ZVE32F-NEXT: slli a7, a7, 3 @@ -6420,6 +6465,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: andi t0, a5, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB55_8 ; RV64ZVE32F-NEXT: .LBB55_12: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s t0, v10 ; RV64ZVE32F-NEXT: slli t0, t0, 3 ; RV64ZVE32F-NEXT: add t0, a1, t0 @@ -6427,12 +6473,14 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: andi t1, a5, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB55_9 ; RV64ZVE32F-NEXT: .LBB55_13: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8 ; RV64ZVE32F-NEXT: slli t1, t1, 3 ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB55_14: # %else14 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: beqz t2, .LBB55_17 @@ -6451,6 +6499,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: andi a5, a5, -128 ; RV64ZVE32F-NEXT: beqz a5, .LBB55_16 ; RV64ZVE32F-NEXT: .LBB55_18: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -6689,6 +6738,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: andi a7, a5, 8 ; RV64ZVE32F-NEXT: beqz a7, .LBB56_7 ; RV64ZVE32F-NEXT: .LBB56_11: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8 ; RV64ZVE32F-NEXT: slli a7, a7, 32 @@ -6698,6 +6748,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: andi t0, a5, 16 ; RV64ZVE32F-NEXT: beqz t0, .LBB56_8 ; RV64ZVE32F-NEXT: .LBB56_12: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s t0, v10 ; RV64ZVE32F-NEXT: slli t0, t0, 32 ; RV64ZVE32F-NEXT: srli t0, t0, 29 @@ -6706,6 +6757,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: andi t1, a5, 32 ; RV64ZVE32F-NEXT: beqz t1, .LBB56_9 ; RV64ZVE32F-NEXT: .LBB56_13: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8 ; RV64ZVE32F-NEXT: slli t1, t1, 32 @@ -6713,6 +6765,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB56_14: # %else14 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: beqz t2, .LBB56_17 @@ -6732,6 +6785,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: andi a5, a5, -128 ; RV64ZVE32F-NEXT: beqz a5, .LBB56_16 ; RV64ZVE32F-NEXT: .LBB56_18: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 32 @@ -7493,14 +7547,15 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB64_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_8 @@ -7641,14 +7696,15 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB65_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_8 @@ -7794,15 +7850,16 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB66_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_8 @@ -7945,13 +8002,14 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB67_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_8 @@ -8454,12 +8512,13 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB74_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -8602,12 +8661,13 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB75_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -8755,13 +8815,14 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB76_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -8909,12 +8970,13 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB77_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -9058,12 +9120,13 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB78_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -9214,13 +9277,14 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB79_13: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -10116,6 +10180,7 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB87_10 ; RV64ZVE32F-NEXT: .LBB87_9: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -10135,6 +10200,7 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_14 ; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -10151,6 +10217,7 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -10159,6 +10226,7 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB87_8 ; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 @@ -10330,6 +10398,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB88_10 ; RV64ZVE32F-NEXT: .LBB88_9: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -10349,6 +10418,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_14 ; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -10365,6 +10435,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -10373,6 +10444,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB88_8 ; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 @@ -10548,6 +10620,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_10 ; RV64ZVE32F-NEXT: .LBB89_9: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: andi a3, a3, 255 @@ -10569,6 +10642,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_14 ; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -10586,6 +10660,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: andi a3, a3, 255 @@ -10595,6 +10670,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_8 ; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: andi a3, a3, 255 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -10769,6 +10845,7 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB90_10 ; RV64ZVE32F-NEXT: .LBB90_9: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -10788,6 +10865,7 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_14 ; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -10804,6 +10882,7 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -10812,6 +10891,7 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB90_8 ; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 @@ -10984,6 +11064,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB91_10 ; RV64ZVE32F-NEXT: .LBB91_9: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -11003,6 +11084,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_14 ; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -11019,6 +11101,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB91_15: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -11027,6 +11110,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB91_8 ; RV64ZVE32F-NEXT: .LBB91_16: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 @@ -11205,6 +11289,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: andi a4, a3, 32 ; RV64ZVE32F-NEXT: beqz a4, .LBB92_10 ; RV64ZVE32F-NEXT: .LBB92_9: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9 ; RV64ZVE32F-NEXT: and a4, a4, a2 @@ -11226,6 +11311,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: andi a3, a3, -128 ; RV64ZVE32F-NEXT: beqz a3, .LBB92_14 ; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: and a2, a3, a2 @@ -11243,6 +11329,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB92_15: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9 ; RV64ZVE32F-NEXT: and a4, a4, a2 @@ -11252,6 +11339,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: andi a4, a3, 16 ; RV64ZVE32F-NEXT: beqz a4, .LBB92_8 ; RV64ZVE32F-NEXT: .LBB92_16: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a4, v8 ; RV64ZVE32F-NEXT: and a4, a4, a2 ; RV64ZVE32F-NEXT: slli a4, a4, 3 @@ -11419,12 +11507,14 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_9 ; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB93_9: # %else14 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_11 @@ -11437,6 +11527,7 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB93_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -11460,6 +11551,7 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_6 ; RV64ZVE32F-NEXT: .LBB93_15: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -11468,6 +11560,7 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_7 ; RV64ZVE32F-NEXT: .LBB93_16: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 @@ -11633,12 +11726,14 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB94_9 ; RV64ZVE32F-NEXT: .LBB94_8: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB94_9: # %else14 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: beqz a3, .LBB94_11 @@ -11651,6 +11746,7 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB94_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -11674,6 +11770,7 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB94_6 ; RV64ZVE32F-NEXT: .LBB94_15: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -11682,6 +11779,7 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB94_7 ; RV64ZVE32F-NEXT: .LBB94_16: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 ; RV64ZVE32F-NEXT: slli a3, a3, 3 ; RV64ZVE32F-NEXT: add a3, a1, a3 @@ -11850,6 +11948,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB95_9 ; RV64ZVE32F-NEXT: .LBB95_8: # %cond.load13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 32 @@ -11857,6 +11956,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB95_9: # %else14 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: beqz a3, .LBB95_11 @@ -11870,6 +11970,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB95_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 32 @@ -11895,6 +11996,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB95_6 ; RV64ZVE32F-NEXT: .LBB95_15: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: slli a3, a3, 32 @@ -11904,6 +12006,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB95_7 ; RV64ZVE32F-NEXT: .LBB95_16: # %cond.load10 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 ; RV64ZVE32F-NEXT: slli a3, a3, 32 ; RV64ZVE32F-NEXT: srli a3, a3, 29 @@ -12309,6 +12412,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB97_26: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12363,6 +12467,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: bnez a2, .LBB97_14 ; RV64ZVE32F-NEXT: j .LBB97_15 ; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load31 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12535,6 +12640,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_19 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12729,6 +12835,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB98_50: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12828,6 +12935,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: bltz a2, .LBB98_27 ; RV64ZVE32F-NEXT: j .LBB98_28 ; RV64ZVE32F-NEXT: .LBB98_58: # %cond.load55 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12890,6 +12998,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: bltz a2, .LBB98_38 ; RV64ZVE32F-NEXT: j .LBB98_39 ; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load79 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 1da6953cf28af..5b08fe87bbde0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -528,8 +528,8 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB9_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -539,8 +539,8 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB9_6: # %else4 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -558,8 +558,8 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB9_10: # %else10 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -572,11 +572,12 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB9_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB9_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_8 @@ -593,8 +594,8 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB9_12 @@ -603,8 +604,8 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vse8.v v8, (a0) ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs @@ -1123,6 +1124,7 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB18_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB18_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -1253,6 +1255,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB19_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB19_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -1388,6 +1391,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB20_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB20_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -1478,8 +1482,8 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB21_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -1490,8 +1494,8 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 ; RV64ZVE32F-NEXT: vse16.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB21_6: # %else4 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -1510,8 +1514,8 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB21_10: # %else10 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -1524,12 +1528,13 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB21_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB21_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_8 @@ -1548,8 +1553,8 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB21_12 @@ -1559,8 +1564,8 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vse16.v v8, (a0) ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs @@ -2022,6 +2027,7 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB29_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB29_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -2151,6 +2157,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB30_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB30_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -2285,6 +2292,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB31_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB31_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -2420,6 +2428,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: .LBB32_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB32_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -2550,6 +2559,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB33_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB33_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -2687,6 +2697,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB34_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 ; RV64ZVE32F-NEXT: and a3, a3, a1 @@ -2775,8 +2786,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma @@ -2800,8 +2811,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB35_9: # %else10 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -2817,8 +2828,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6 @@ -2828,8 +2839,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 @@ -2848,8 +2859,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB35_11 @@ -2859,8 +2870,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vse32.v v8, (a0) ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs @@ -3634,6 +3645,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_10 ; RV64ZVE32F-NEXT: .LBB42_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -3650,6 +3662,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB42_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -3658,6 +3671,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_8 ; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 @@ -3673,6 +3687,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_12 ; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -3877,6 +3892,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_10 ; RV64ZVE32F-NEXT: .LBB43_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -3893,6 +3909,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB43_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -3901,6 +3918,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_8 ; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 @@ -3916,6 +3934,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_12 ; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -4124,6 +4143,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_10 ; RV64ZVE32F-NEXT: .LBB44_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: andi a0, a0, 255 @@ -4141,6 +4161,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB44_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: andi a0, a0, 255 @@ -4150,6 +4171,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_8 ; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: andi a0, a0, 255 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -4167,6 +4189,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_12 ; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: andi a0, a0, 255 @@ -4374,6 +4397,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_10 ; RV64ZVE32F-NEXT: .LBB45_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -4390,6 +4414,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: .LBB45_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -4398,6 +4423,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_8 ; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 @@ -4413,6 +4439,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_12 ; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -4618,6 +4645,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_10 ; RV64ZVE32F-NEXT: .LBB46_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -4634,6 +4662,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB46_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -4642,6 +4671,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_8 ; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 @@ -4657,6 +4687,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_12 ; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -4868,6 +4899,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_10 ; RV64ZVE32F-NEXT: .LBB47_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: and a0, a0, a4 @@ -4885,6 +4917,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB47_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 ; RV64ZVE32F-NEXT: and a0, a0, a4 @@ -4894,6 +4927,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_8 ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: and a0, a0, a4 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -4911,6 +4945,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_12 ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: and a0, a0, a4 @@ -5111,12 +5146,14 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_9 ; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a5, 0(a0) ; RV64ZVE32F-NEXT: .LBB48_9: # %else10 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_15 @@ -5133,6 +5170,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_6 ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -5141,6 +5179,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_7 ; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 @@ -5156,6 +5195,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_11 ; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -5354,12 +5394,14 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_9 ; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a5, 0(a0) ; RV64ZVE32F-NEXT: .LBB49_9: # %else10 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_15 @@ -5376,6 +5418,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_6 ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -5384,6 +5427,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_7 ; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 @@ -5399,6 +5443,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_11 ; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 @@ -5600,6 +5645,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_9 ; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 32 @@ -5607,6 +5653,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a5, 0(a0) ; RV64ZVE32F-NEXT: .LBB50_9: # %else10 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_15 @@ -5624,6 +5671,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_6 ; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 32 @@ -5633,6 +5681,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_7 ; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 ; RV64ZVE32F-NEXT: slli a0, a0, 32 ; RV64ZVE32F-NEXT: srli a0, a0, 29 @@ -5650,6 +5699,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a0, a4, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_11 ; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 32 @@ -6350,6 +6400,7 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: .LBB58_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -6480,6 +6531,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB59_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -6615,6 +6667,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB60_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -6705,8 +6758,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB61_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -6717,8 +6770,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 ; RV64ZVE32F-NEXT: vse16.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB61_6: # %else4 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma @@ -6737,8 +6790,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB61_10: # %else10 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -6751,12 +6804,13 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB61_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_8 @@ -6775,8 +6829,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV64ZVE32F-NEXT: vse16.v v10, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB61_12 @@ -6786,8 +6840,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vse16.v v8, (a0) ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs @@ -7195,6 +7249,7 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: .LBB68_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB68_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7324,6 +7379,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB69_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB69_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7458,6 +7514,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB70_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB70_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -7593,6 +7650,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: .LBB71_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB71_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7723,6 +7781,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB72_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB72_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -7860,6 +7919,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB73_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB73_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 ; RV64ZVE32F-NEXT: and a3, a3, a1 @@ -7948,8 +8008,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma @@ -7973,8 +8033,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB74_9: # %else10 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -7990,8 +8050,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_6 @@ -8001,8 +8061,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7 @@ -8021,8 +8081,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB74_11 @@ -8032,8 +8092,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 ; RV64ZVE32F-NEXT: vse32.v v8, (a0) ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs @@ -8668,6 +8728,7 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_10 ; RV64ZVE32F-NEXT: .LBB81_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -8684,6 +8745,7 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: .LBB81_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -8692,6 +8754,7 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_8 ; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8707,6 +8770,7 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB81_12 ; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -8867,6 +8931,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_10 ; RV64ZVE32F-NEXT: .LBB82_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -8883,6 +8948,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB82_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -8891,6 +8957,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_8 ; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -8906,6 +8973,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB82_12 ; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -9070,6 +9138,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_10 ; RV64ZVE32F-NEXT: .LBB83_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -9087,6 +9156,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB83_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -9096,6 +9166,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_8 ; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -9113,6 +9184,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB83_12 ; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: andi a1, a1, 255 @@ -9276,6 +9348,7 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_10 ; RV64ZVE32F-NEXT: .LBB84_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -9292,6 +9365,7 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV64ZVE32F-NEXT: .LBB84_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -9300,6 +9374,7 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_8 ; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -9315,6 +9390,7 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB84_12 ; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -9476,6 +9552,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_10 ; RV64ZVE32F-NEXT: .LBB85_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -9492,6 +9569,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB85_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB85_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -9500,6 +9578,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_8 ; RV64ZVE32F-NEXT: .LBB85_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -9515,6 +9594,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB85_12 ; RV64ZVE32F-NEXT: .LBB85_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -9682,6 +9762,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_10 ; RV64ZVE32F-NEXT: .LBB86_9: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: and a3, a3, a1 @@ -9699,6 +9780,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB86_12: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB86_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: and a3, a3, a1 @@ -9708,6 +9790,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_8 ; RV64ZVE32F-NEXT: .LBB86_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 3 @@ -9725,6 +9808,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a2, a2, -128 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_12 ; RV64ZVE32F-NEXT: .LBB86_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: and a1, a2, a1 @@ -9881,12 +9965,14 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_9 ; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB87_9: # %else10 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_15 @@ -9903,6 +9989,7 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_6 ; RV64ZVE32F-NEXT: .LBB87_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -9911,6 +9998,7 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_7 ; RV64ZVE32F-NEXT: .LBB87_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -9926,6 +10014,7 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB87_11 ; RV64ZVE32F-NEXT: .LBB87_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -10080,12 +10169,14 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_9 ; RV64ZVE32F-NEXT: .LBB88_8: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB88_9: # %else10 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_15 @@ -10102,6 +10193,7 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_6 ; RV64ZVE32F-NEXT: .LBB88_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 3 @@ -10110,6 +10202,7 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_7 ; RV64ZVE32F-NEXT: .LBB88_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -10125,6 +10218,7 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB88_11 ; RV64ZVE32F-NEXT: .LBB88_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 3 @@ -10282,6 +10376,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_9 ; RV64ZVE32F-NEXT: .LBB89_8: # %cond.store9 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 32 @@ -10289,6 +10384,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB89_9: # %else10 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_15 @@ -10306,6 +10402,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_6 ; RV64ZVE32F-NEXT: .LBB89_13: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 32 @@ -10315,6 +10412,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_7 ; RV64ZVE32F-NEXT: .LBB89_14: # %cond.store7 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 32 ; RV64ZVE32F-NEXT: srli a2, a2, 29 @@ -10332,6 +10430,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB89_11 ; RV64ZVE32F-NEXT: .LBB89_16: # %cond.store13 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: slli a1, a1, 32 @@ -10593,8 +10692,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB91_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -10604,8 +10703,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB91_6: # %else4 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -10623,8 +10722,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB91_10: # %else10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma @@ -10647,8 +10746,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB91_15: # %else18 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -10658,8 +10757,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB91_17: # %else20 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -10677,8 +10776,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB91_21: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -10688,8 +10787,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: # %bb.22: # %cond.store27 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB91_23: # %else28 ; RV64ZVE32F-NEXT: lui a2, 1048568 @@ -10700,17 +10799,18 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15 ; RV64ZVE32F-NEXT: vse8.v v8, (a0) ; RV64ZVE32F-NEXT: .LBB91_25: # %else30 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_8 @@ -10727,8 +10827,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 128 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_12 @@ -10737,8 +10837,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_13 @@ -10753,11 +10853,12 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: bnez a2, .LBB91_14 ; RV64ZVE32F-NEXT: j .LBB91_15 ; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 11 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 11 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_19 @@ -10825,8 +10926,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -10836,8 +10937,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_6: # %else4 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -10855,8 +10956,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_10: # %else10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma @@ -10879,8 +10980,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 9 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_15: # %else18 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -10890,8 +10991,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_17: # %else20 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -10899,11 +11000,12 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_19 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 11 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 11 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_19: # %else22 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma @@ -10924,8 +11026,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_23: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -10946,8 +11048,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_28: # %else34 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -10957,8 +11059,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: # %bb.29: # %cond.store35 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_30: # %else36 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -10976,8 +11078,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_34: # %else42 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma @@ -11000,8 +11102,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_39: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -11011,8 +11113,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: # %bb.40: # %cond.store51 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_41: # %else52 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -11030,8 +11132,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_45: # %else58 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -11041,8 +11143,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: # %bb.46: # %cond.store59 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_47: # %else60 ; RV64ZVE32F-NEXT: lui a2, 524288 @@ -11053,17 +11155,18 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31 ; RV64ZVE32F-NEXT: vse8.v v8, (a0) ; RV64ZVE32F-NEXT: .LBB92_49: # %else62 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 3 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 3 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_8 @@ -11080,8 +11183,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 128 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_12 @@ -11090,8 +11193,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 7 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_13 @@ -11108,8 +11211,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store27 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 48 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_25 @@ -11118,8 +11221,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 47 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_26 @@ -11134,11 +11237,12 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: bltz a2, .LBB92_27 ; RV64ZVE32F-NEXT: j .LBB92_28 ; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 43 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_32 @@ -11155,8 +11259,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store43 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 40 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_36 @@ -11165,8 +11269,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 39 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_37 @@ -11181,11 +11285,12 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: bltz a2, .LBB92_38 ; RV64ZVE32F-NEXT: j .LBB92_39 ; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: slli a2, a1, 35 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_43 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll index 0067b8df6a0f7..9095f95b37ec8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -43,9 +43,9 @@ define void @store_v6i8(ptr %p, <6 x i8> %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret store <6 x i8> %v, ptr %p @@ -67,9 +67,9 @@ define void @store_v12i8(ptr %p, <12 x i8> %v) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret store <12 x i8> %v, ptr %p @@ -91,9 +91,9 @@ define void @store_v6i16(ptr %p, <6 x i16> %v) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret store <6 x i16> %v, ptr %p @@ -155,9 +155,9 @@ define void @store_v6f16(ptr %p, <6 x half> %v) { ; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret store <6 x half> %v, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll index db2361dd3e586..d873d9ad7c85c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll @@ -20,6 +20,7 @@ define i32 @test(i32 %call.i) { ; CHECK-V-NEXT: vmslt.vx v0, v8, a0 ; CHECK-V-NEXT: vmv.v.i v8, 0 ; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vmv.x.s a0, v8 ; CHECK-V-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index 9e8463a085302..4a6f52ac940b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -22,7 +22,9 @@ define <4 x i8> @v2i8_2(<2 x i8> %a, <2 x i8> %b) { ; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 @@ -170,7 +172,9 @@ define <4 x i16> @v2i16_2(<2 x i16> %a, <2 x i16> %b) { ; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 @@ -317,7 +321,9 @@ define <4 x i32> @v2i32_2(<2 x i32> %a, < 2 x i32> %b) { ; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 @@ -465,7 +471,9 @@ define <4 x i64> @v2i64_2(<2 x i64> %a, < 2 x i64> %b) { ; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 @@ -552,7 +560,9 @@ define <4 x half> @v2f16_2(<2 x half> %a, <2 x half> %b) { ; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 @@ -673,7 +683,9 @@ define <4 x float> @v2f32_2(<2 x float> %a, <2 x float> %b) { ; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 @@ -764,7 +776,9 @@ define <4 x double> @v2f64_2(<2 x double> %a, < 2 x double> %b) { ; CHECK-NEXT: vslidedown.vi v10, v8, 1 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vi v10, v8, 1 +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll index 67431f72c9cc1..6faa4f6b45da7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -15,7 +15,9 @@ define @splice_nxv1i1_offset_negone( %a, @splice_nxv2i1_offset_negone( %a, @splice_nxv4i1_offset_negone( %a, @splice_nxv8i1_offset_negone( %a, @splat_idx_nxv4i32( %v, i64 %idx) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index dceae00d2e517..2fa8474858c09 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -564,6 +564,7 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vand.vx v8, v8, a1 ; RV32MV-NEXT: vmsltu.vv v0, v12, v8 ; RV32MV-NEXT: vmerge.vim v8, v10, -1, v0 +; RV32MV-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV32MV-NEXT: vslidedown.vi v9, v8, 2 ; RV32MV-NEXT: vmv.x.s a1, v9 ; RV32MV-NEXT: slli a2, a1, 21 @@ -627,6 +628,7 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: vmerge.vim v8, v10, -1, v0 ; RV64MV-NEXT: vmv.x.s a1, v8 ; RV64MV-NEXT: andi a1, a1, 2047 +; RV64MV-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64MV-NEXT: vslidedown.vi v9, v8, 1 ; RV64MV-NEXT: vmv.x.s a2, v9 ; RV64MV-NEXT: andi a2, a2, 2047