diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 5aab138dae408..d9f8222669cab 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -96,6 +96,11 @@ static cl::opt EnableMISchedLoadClustering( cl::desc("Enable load clustering in the machine scheduler"), cl::init(false)); +static cl::opt EnableVSETVLIAfterRVVRegAlloc( + "riscv-vsetvl-after-rvv-regalloc", cl::Hidden, + cl::desc("Insert vsetvls after vector register allocation"), + cl::init(true)); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); @@ -389,6 +394,8 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) { bool RISCVPassConfig::addRegAssignAndRewriteFast() { addPass(createRVVRegAllocPass(false)); + if (EnableVSETVLIAfterRVVRegAlloc) + addPass(createRISCVInsertVSETVLIPass()); addPass(createRISCVCoalesceVSETVLIPass()); if (TM->getOptLevel() != CodeGenOptLevel::None && EnableRISCVDeadRegisterElimination) @@ -399,6 +406,8 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() { bool RISCVPassConfig::addRegAssignAndRewriteOptimized() { addPass(createRVVRegAllocPass(true)); addPass(createVirtRegRewriter(false)); + if (EnableVSETVLIAfterRVVRegAlloc) + addPass(createRISCVInsertVSETVLIPass()); addPass(createRISCVCoalesceVSETVLIPass()); if (TM->getOptLevel() != CodeGenOptLevel::None && EnableRISCVDeadRegisterElimination) @@ -547,10 +556,12 @@ void RISCVPassConfig::addPreRegAlloc() { // Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after // register coalescing so needVSETVLIPHI doesn't need to look through COPYs. - if (TM->getOptLevel() == CodeGenOptLevel::None) - insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID); - else - insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID); + if (!EnableVSETVLIAfterRVVRegAlloc) { + if (TM->getOptLevel() == CodeGenOptLevel::None) + insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID); + else + insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID); + } } void RISCVPassConfig::addFastRegAlloc() { diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index 3aaa5dc03a7dc..e4abc93d1a8a1 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -44,14 +44,12 @@ ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation -; CHECK-NEXT: MachineDominator Tree Construction -; CHECK-NEXT: Slot index numbering -; CHECK-NEXT: Live Interval Analysis -; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator +; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Slot index numbering ; CHECK-NEXT: Live Interval Analysis +; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: RISC-V Coalesce VSETVLI pass ; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 52634b2a81629..0528b00d408b2 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -128,7 +128,6 @@ ; CHECK-NEXT: Slot index numbering ; CHECK-NEXT: Live Interval Analysis ; CHECK-NEXT: Register Coalescer -; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Rename Disconnected Subregister Components ; CHECK-NEXT: Machine Instruction Scheduler ; CHECK-NEXT: Machine Block Frequency Analysis @@ -142,6 +141,7 @@ ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Greedy Register Allocator ; CHECK-NEXT: Virtual Register Rewriter +; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: RISC-V Coalesce VSETVLI pass ; CHECK-NEXT: RISC-V Dead register definitions ; CHECK-NEXT: Virtual Register Map diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index 83a4f63add337..eb6ac985287a1 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -24,36 +24,36 @@ define void @_Z3foov() { ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48) -; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46) -; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45) -; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vle16.v v14, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40) +; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44) @@ -71,12 +71,12 @@ define void @_Z3foov() { ; CHECK-NEXT: lui a0, 1048572 ; CHECK-NEXT: addi a0, a0, 928 ; CHECK-NEXT: vmsbc.vx v0, v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu ; CHECK-NEXT: vsext.vf2 v10, v8, v0.t ; CHECK-NEXT: lui a0, %hi(var_47) ; CHECK-NEXT: addi a0, a0, %lo(var_47) diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index ea8feef332984..6009a6c7e138a 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -75,18 +75,18 @@ define i64 @ctz_nxv8i1_no_range( %a) { ; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a2), zero -; RV32-NEXT: vid.v v8 +; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vid.v v16 ; RV32-NEXT: li a2, -1 -; RV32-NEXT: vmadd.vx v8, a2, v16 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vmadd.vx v16, a2, v8 ; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: vl2r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vmsne.vi v0, v16, 0 +; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vmsne.vi v0, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vmerge.vim v16, v16, -1, v0 -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, -1, v0 +; RV32-NEXT: vand.vv v8, v16, v8 ; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a2, v8 ; RV32-NEXT: sltu a3, a0, a2 @@ -108,15 +108,15 @@ define i64 @ctz_nxv8i1_no_range( %a) { ; RV64: # %bb.0: ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vid.v v16 +; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vid.v v24 ; RV64-NEXT: li a1, -1 -; RV64-NEXT: vmadd.vx v16, a1, v24 +; RV64-NEXT: vmadd.vx v24, a1, v16 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vmsne.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: sub a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll index 15daf2c577906..7084c04805be7 100644 --- a/llvm/test/CodeGen/RISCV/pr69586.ll +++ b/llvm/test/CodeGen/RISCV/pr69586.ll @@ -927,258 +927,258 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 -; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: li a2, 11 ; REMAT-NEXT: slli a2, a2, 10 ; REMAT-NEXT: add a2, a0, a2 -; REMAT-NEXT: vle32.v v16, (a2) +; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 -; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: li a2, 23 ; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 -; REMAT-NEXT: vle32.v v26, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v12, v16 -; REMAT-NEXT: vle32.v v12, (a2) -; REMAT-NEXT: lui a2, 3 -; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: vle32.v v14, (a2) -; REMAT-NEXT: li a2, 25 -; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: lui a2, 3 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: vle32.v v16, (a2) -; REMAT-NEXT: li a2, 13 -; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: li a2, 25 +; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: vle32.v v18, (a2) -; REMAT-NEXT: li a2, 27 -; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: li a2, 13 +; REMAT-NEXT: slli a2, a2, 10 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: vle32.v v20, (a2) -; REMAT-NEXT: li a2, 7 -; REMAT-NEXT: slli a2, a2, 11 +; REMAT-NEXT: li a2, 27 +; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: vle32.v v22, (a2) -; REMAT-NEXT: li a2, 29 -; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: li a2, 7 +; REMAT-NEXT: slli a2, a2, 11 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v26 -; REMAT-NEXT: li a2, 15 -; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: li a2, 29 +; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v28 -; REMAT-NEXT: li a2, 31 -; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: li a2, 15 +; REMAT-NEXT: slli a2, a2, 10 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v30 -; REMAT-NEXT: lui a2, 4 +; REMAT-NEXT: li a2, 31 +; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v16, v6 ; REMAT-NEXT: lui a2, 4 -; REMAT-NEXT: addiw a2, a2, 512 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: vle32.v v16, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v4 -; REMAT-NEXT: li a2, 17 -; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: lui a2, 4 +; REMAT-NEXT: addiw a2, a2, 512 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: vle32.v v18, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v2 -; REMAT-NEXT: lui a2, 4 -; REMAT-NEXT: addiw a2, a2, 1536 +; REMAT-NEXT: li a2, 17 +; REMAT-NEXT: slli a2, a2, 10 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: vle32.v v20, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v24 -; REMAT-NEXT: li a2, 9 -; REMAT-NEXT: slli a2, a2, 11 +; REMAT-NEXT: lui a2, 4 +; REMAT-NEXT: addiw a2, a2, 1536 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: vle32.v v22, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v26 -; REMAT-NEXT: lui a2, 5 -; REMAT-NEXT: addiw a2, a2, -1536 +; REMAT-NEXT: li a2, 9 +; REMAT-NEXT: slli a2, a2, 11 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v28 -; REMAT-NEXT: li a2, 19 -; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: lui a2, 5 +; REMAT-NEXT: addiw a2, a2, -1536 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v30 -; REMAT-NEXT: lui ra, 5 -; REMAT-NEXT: addiw ra, ra, -512 -; REMAT-NEXT: add a2, a0, ra +; REMAT-NEXT: li a2, 19 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v6 -; REMAT-NEXT: lui s11, 5 -; REMAT-NEXT: add a2, a0, s11 +; REMAT-NEXT: lui ra, 5 +; REMAT-NEXT: addiw ra, ra, -512 +; REMAT-NEXT: add a2, a0, ra ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v16, v4 -; REMAT-NEXT: lui s10, 5 -; REMAT-NEXT: addiw s10, s10, 512 -; REMAT-NEXT: add a2, a0, s10 +; REMAT-NEXT: lui s11, 5 +; REMAT-NEXT: add a2, a0, s11 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: vle32.v v16, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v2 -; REMAT-NEXT: li s9, 21 -; REMAT-NEXT: slli s9, s9, 10 -; REMAT-NEXT: add a2, a0, s9 +; REMAT-NEXT: lui s10, 5 +; REMAT-NEXT: addiw s10, s10, 512 +; REMAT-NEXT: add a2, a0, s10 ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: vle32.v v18, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v24 -; REMAT-NEXT: lui s8, 5 -; REMAT-NEXT: addiw s8, s8, 1536 -; REMAT-NEXT: add a2, a0, s8 +; REMAT-NEXT: li s9, 21 +; REMAT-NEXT: slli s9, s9, 10 +; REMAT-NEXT: add a2, a0, s9 ; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: vle32.v v20, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v26 -; REMAT-NEXT: li s7, 11 -; REMAT-NEXT: slli s7, s7, 11 -; REMAT-NEXT: add a2, a0, s7 +; REMAT-NEXT: lui s8, 5 +; REMAT-NEXT: addiw s8, s8, 1536 +; REMAT-NEXT: add a2, a0, s8 ; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: vle32.v v22, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v28 -; REMAT-NEXT: lui s6, 6 -; REMAT-NEXT: addiw s6, s6, -1536 -; REMAT-NEXT: add a2, a0, s6 +; REMAT-NEXT: li s7, 11 +; REMAT-NEXT: slli s7, s7, 11 +; REMAT-NEXT: add a2, a0, s7 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v30 -; REMAT-NEXT: li s5, 23 -; REMAT-NEXT: slli s5, s5, 10 -; REMAT-NEXT: add a2, a0, s5 +; REMAT-NEXT: lui s6, 6 +; REMAT-NEXT: addiw s6, s6, -1536 +; REMAT-NEXT: add a2, a0, s6 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v6 -; REMAT-NEXT: lui s4, 6 -; REMAT-NEXT: addiw s4, s4, -512 -; REMAT-NEXT: add a2, a0, s4 +; REMAT-NEXT: li s5, 23 +; REMAT-NEXT: slli s5, s5, 10 +; REMAT-NEXT: add a2, a0, s5 ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v4 -; REMAT-NEXT: lui s3, 6 -; REMAT-NEXT: add a2, a0, s3 +; REMAT-NEXT: lui s4, 6 +; REMAT-NEXT: addiw s4, s4, -512 +; REMAT-NEXT: add a2, a0, s4 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v16, v2 -; REMAT-NEXT: lui s2, 6 -; REMAT-NEXT: addiw s2, s2, 512 -; REMAT-NEXT: add a2, a0, s2 +; REMAT-NEXT: lui s3, 6 +; REMAT-NEXT: add a2, a0, s3 ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: vle32.v v16, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v18, v24 -; REMAT-NEXT: li s1, 25 -; REMAT-NEXT: slli s1, s1, 10 -; REMAT-NEXT: add a2, a0, s1 +; REMAT-NEXT: lui s2, 6 +; REMAT-NEXT: addiw s2, s2, 512 +; REMAT-NEXT: add a2, a0, s2 ; REMAT-NEXT: vle32.v v0, (a2) ; REMAT-NEXT: vle32.v v18, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v26 -; REMAT-NEXT: lui s0, 6 -; REMAT-NEXT: addiw s0, s0, 1536 -; REMAT-NEXT: add a2, a0, s0 +; REMAT-NEXT: li s1, 25 +; REMAT-NEXT: slli s1, s1, 10 +; REMAT-NEXT: add a2, a0, s1 ; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: vle32.v v20, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v28 -; REMAT-NEXT: li t6, 13 -; REMAT-NEXT: slli t6, t6, 11 -; REMAT-NEXT: add a2, a0, t6 +; REMAT-NEXT: lui s0, 6 +; REMAT-NEXT: addiw s0, s0, 1536 +; REMAT-NEXT: add a2, a0, s0 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: vle32.v v22, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v30 -; REMAT-NEXT: lui t5, 7 -; REMAT-NEXT: addiw t5, t5, -1536 -; REMAT-NEXT: add a2, a0, t5 +; REMAT-NEXT: li t6, 13 +; REMAT-NEXT: slli t6, t6, 11 +; REMAT-NEXT: add a2, a0, t6 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v10, v6 -; REMAT-NEXT: li t4, 27 -; REMAT-NEXT: slli t4, t4, 10 -; REMAT-NEXT: add a2, a0, t4 +; REMAT-NEXT: lui t5, 7 +; REMAT-NEXT: addiw t5, t5, -1536 +; REMAT-NEXT: add a2, a0, t5 ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v12, v4 -; REMAT-NEXT: lui t3, 7 -; REMAT-NEXT: addiw t3, t3, -512 -; REMAT-NEXT: add a2, a0, t3 +; REMAT-NEXT: li t4, 27 +; REMAT-NEXT: slli t4, t4, 10 +; REMAT-NEXT: add a2, a0, t4 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v14, v2 +; REMAT-NEXT: lui t3, 7 +; REMAT-NEXT: addiw t3, t3, -512 +; REMAT-NEXT: add a2, a0, t3 +; REMAT-NEXT: vle32.v v2, (a2) +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v16, v0 ; REMAT-NEXT: lui t2, 7 ; REMAT-NEXT: add a2, a0, t2 -; REMAT-NEXT: vle32.v v2, (a2) +; REMAT-NEXT: vle32.v v0, (a2) ; REMAT-NEXT: vle32.v v8, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v16, v0 +; REMAT-NEXT: sf.vc.vv 3, 0, v18, v26 ; REMAT-NEXT: lui t1, 7 ; REMAT-NEXT: addiw t1, t1, 512 ; REMAT-NEXT: add a2, a0, t1 -; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: vle32.v v16, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v18, v26 +; REMAT-NEXT: vle32.v v18, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v20, v28 ; REMAT-NEXT: li t0, 29 ; REMAT-NEXT: slli t0, t0, 10 ; REMAT-NEXT: add a2, a0, t0 -; REMAT-NEXT: vle32.v v18, (a2) +; REMAT-NEXT: vle32.v v20, (a2) ; REMAT-NEXT: vle32.v v26, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v20, v28 +; REMAT-NEXT: sf.vc.vv 3, 0, v22, v30 ; REMAT-NEXT: lui a7, 7 ; REMAT-NEXT: addiw a7, a7, 1536 ; REMAT-NEXT: add a2, a0, a7 -; REMAT-NEXT: vle32.v v20, (a2) +; REMAT-NEXT: vle32.v v22, (a2) ; REMAT-NEXT: vle32.v v28, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v22, v30 +; REMAT-NEXT: sf.vc.vv 3, 0, v24, v6 ; REMAT-NEXT: li a6, 15 ; REMAT-NEXT: slli a6, a6, 11 ; REMAT-NEXT: add a2, a0, a6 -; REMAT-NEXT: vle32.v v22, (a2) +; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: vle32.v v30, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v24, v6 +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v4 ; REMAT-NEXT: lui a5, 8 ; REMAT-NEXT: addiw a5, a5, -1536 ; REMAT-NEXT: add a2, a0, a5 -; REMAT-NEXT: vle32.v v24, (a2) +; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: vle32.v v6, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v10, v4 +; REMAT-NEXT: sf.vc.vv 3, 0, v12, v2 ; REMAT-NEXT: li a4, 31 ; REMAT-NEXT: slli a4, a4, 10 ; REMAT-NEXT: add a2, a0, a4 -; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: vle32.v v4, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v12, v2 +; REMAT-NEXT: sf.vc.vv 3, 0, v14, v0 ; REMAT-NEXT: lui a3, 8 ; REMAT-NEXT: addiw a3, a3, -512 ; REMAT-NEXT: add a2, a0, a3 -; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: lui a2, 8 ; REMAT-NEXT: add a0, a0, a2 ; REMAT-NEXT: vle32.v v0, (a0) -; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 -; REMAT-NEXT: sf.vc.vv 3, 0, v16, v18 -; REMAT-NEXT: sf.vc.vv 3, 0, v26, v20 -; REMAT-NEXT: sf.vc.vv 3, 0, v28, v22 -; REMAT-NEXT: sf.vc.vv 3, 0, v30, v24 -; REMAT-NEXT: sf.vc.vv 3, 0, v6, v10 -; REMAT-NEXT: sf.vc.vv 3, 0, v4, v12 +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v16 +; REMAT-NEXT: sf.vc.vv 3, 0, v18, v20 +; REMAT-NEXT: sf.vc.vv 3, 0, v26, v22 +; REMAT-NEXT: sf.vc.vv 3, 0, v28, v24 +; REMAT-NEXT: sf.vc.vv 3, 0, v30, v10 +; REMAT-NEXT: sf.vc.vv 3, 0, v6, v12 +; REMAT-NEXT: sf.vc.vv 3, 0, v4, v14 ; REMAT-NEXT: sf.vc.vv 3, 0, v2, v0 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; REMAT-NEXT: addi a0, a1, 1024 diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll index 81ef6072449e8..c92ba98dcc338 100644 --- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll +++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -43,7 +43,6 @@ define void @last_chance_recoloring_failure() { ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: call func -; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 @@ -55,6 +54,7 @@ define void @last_chance_recoloring_failure() { ; CHECK-NEXT: vl4r.v v20, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma ; CHECK-NEXT: vfwsub.wv v8, v24, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, mu ; CHECK-NEXT: vfdiv.vv v8, v24, v8, v0.t @@ -99,7 +99,6 @@ define void @last_chance_recoloring_failure() { ; SUBREGLIVENESS-NEXT: addi a0, sp, 16 ; SUBREGLIVENESS-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; SUBREGLIVENESS-NEXT: call func -; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma ; SUBREGLIVENESS-NEXT: csrr a0, vlenb ; SUBREGLIVENESS-NEXT: slli a0, a0, 3 ; SUBREGLIVENESS-NEXT: add a0, sp, a0 @@ -111,6 +110,7 @@ define void @last_chance_recoloring_failure() { ; SUBREGLIVENESS-NEXT: vl4r.v v20, (a0) # Unknown-size Folded Reload ; SUBREGLIVENESS-NEXT: addi a0, sp, 16 ; SUBREGLIVENESS-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma ; SUBREGLIVENESS-NEXT: vfwsub.wv v8, v24, v16 ; SUBREGLIVENESS-NEXT: vsetvli zero, zero, e32, m8, tu, mu ; SUBREGLIVENESS-NEXT: vfdiv.vv v8, v24, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll index eb74e2d302f1a..05d6716e47192 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll @@ -590,13 +590,12 @@ define @vp_abs_nxv16i64( %va, @access_fixed_and_vector_objects(ptr %val) { ; RV64IV-NEXT: addi a0, sp, 8 ; RV64IV-NEXT: vl1re64.v v8, (a0) ; RV64IV-NEXT: addi a0, sp, 528 -; RV64IV-NEXT: ld a1, 520(sp) ; RV64IV-NEXT: vl1re64.v v9, (a0) -; RV64IV-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64IV-NEXT: ld a0, 520(sp) +; RV64IV-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV64IV-NEXT: vadd.vv v8, v8, v9 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index 139579b3d2a36..9cb3991f31f94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -103,9 +103,9 @@ define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) { define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 @@ -124,31 +124,30 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0) ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 -; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 -; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 4 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v8, v16, a2 +; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: lui a0, %hi(.LCPI9_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2) +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vmsltu.vx v10, v16, a2 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v8, 2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v10, 4 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsext.vf8 v16, v8 +; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma @@ -174,51 +173,48 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: vmsltu.vx v8, v16, a2 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 -; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_3) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v11, (a0) ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: vmsltu.vx v9, v16, a2 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 6 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: lui a0, %hi(.LCPI10_4) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsext.vf8 v16, v11 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v11, v16, a2 ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: vmsltu.vx v0, v16, a2 -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v9, 2 +; CHECK-NEXT: lui a0, %hi(.LCPI10_4) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) +; CHECK-NEXT: vle8.v v12, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI10_5) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vle8.v v13, (a0) +; CHECK-NEXT: vsext.vf8 v16, v12 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v12, v16, a2 +; CHECK-NEXT: vsext.vf8 v16, v13 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v13, v16, a2 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v9, 4 +; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_6) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v11, 6 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v12, 2 +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vi v0, v13, 4 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: vmsltu.vx v9, v16, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index 879dff4a6e490..5217148ba4f4e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -1449,27 +1449,27 @@ define @vp_bitreverse_nxv1i64( %va, @vp_bitreverse_nxv1i64_unmasked( %va ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4 -; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vsrl.vx v11, v8, a1 ; RV32-NEXT: vsrl.vx v12, v8, a3 ; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vor.vv v11, v12, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vand.vx v12, v12, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v11 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1733,27 +1733,27 @@ define @vp_bitreverse_nxv2i64( %va, @vp_bitreverse_nxv2i64_unmasked( %va ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vsrl.vx v14, v8, a1 ; RV32-NEXT: vsrl.vx v16, v8, a3 ; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v14, v16, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v14 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -2017,13 +2017,13 @@ define @vp_bitreverse_nxv4i64( %va, @vp_bitreverse_nxv4i64_unmasked( %va ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v20, v20, v24 +; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vsrl.vx v20, v8, a1 ; RV32-NEXT: vsrl.vx v24, v8, a3 ; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vor.vv v20, v24, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vand.vx v24, v24, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v20 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -2311,20 +2311,23 @@ define @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64_unmasked( %va ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -2669,20 +2672,23 @@ define @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64_unmasked( %va ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -3056,13 +3062,13 @@ define @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16( %va, @llvm.vp.bitreverse.nxv64i16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index 800d06c5a78f5..aadd9852af11e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -525,27 +525,27 @@ define @vp_bswap_nxv1i64( %va, @vp_bswap_nxv1i64_unmasked( %va, i32 ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4 -; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vsrl.vx v11, v8, a1 ; RV32-NEXT: vsrl.vx v12, v8, a3 ; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vor.vv v11, v12, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vand.vx v12, v12, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v11 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -695,27 +695,27 @@ define @vp_bswap_nxv2i64( %va, @vp_bswap_nxv2i64_unmasked( %va, i32 ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vsrl.vx v14, v8, a1 ; RV32-NEXT: vsrl.vx v16, v8, a3 ; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v14, v16, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v14 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -865,13 +865,13 @@ define @vp_bswap_nxv4i64( %va, @vp_bswap_nxv4i64_unmasked( %va, i32 ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v20, v20, v24 +; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vsrl.vx v20, v8, a1 ; RV32-NEXT: vsrl.vx v24, v8, a3 ; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vor.vv v20, v24, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vand.vx v24, v24, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v20 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1045,20 +1045,23 @@ define @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64_unmasked( %va, i32 ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -1288,20 +1291,23 @@ define @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64_unmasked( %va, i32 ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -1539,13 +1545,13 @@ define @vp_bswap_nxv64i16( %va, @vp_bswap_nxv64i16( %va, @llvm.vp.bswap.nxv64i16( %va, %m, i32 %evl) @@ -1646,27 +1652,27 @@ define @vp_bswap_nxv1i48( %va, @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a3, a2, a1 ; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vl8re32.v v8, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re32.v v0, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vl8re32.v v8, (a3) +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vl8re32.v v0, (a1) +; CHECK-NEXT: vl8re32.v v16, (a3) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v16, (a2) -; CHECK-NEXT: vadd.vv v0, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vv v24, v8, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v24, v24, v8 +; CHECK-NEXT: vadd.vv v0, v8, v0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v8, v24, v8 -; CHECK-NEXT: vadd.vv v24, v0, v16 +; CHECK-NEXT: vadd.vv v8, v0, v8 +; CHECK-NEXT: vadd.vv v24, v24, v16 ; CHECK-NEXT: vadd.vx v16, v8, a4 ; CHECK-NEXT: vadd.vx v8, v24, a4 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index aa11e012af201..dec67721514de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -135,16 +135,16 @@ declare @llvm.vp.ceil.nxv8f16(, @vp_ceil_vv_nxv8f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -179,16 +179,16 @@ declare @llvm.vp.ceil.nxv16f16(, @vp_ceil_vv_nxv16f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -223,16 +223,16 @@ declare @llvm.vp.ceil.nxv32f16(, @vp_ceil_vv_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -359,8 +359,8 @@ define @vp_ceil_vv_nxv4f32( %va, @vp_ceil_vv_nxv8f32( %va, @vp_ceil_vv_nxv16f32( %va, @llvm.vp.ceil.nxv2f64(, @vp_ceil_vv_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -569,16 +569,16 @@ declare @llvm.vp.ceil.nxv4f64(, @vp_ceil_vv_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -613,16 +613,16 @@ declare @llvm.vp.ceil.nxv7f64(, @vp_ceil_vv_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -657,16 +657,16 @@ declare @llvm.vp.ceil.nxv8f64(, @vp_ceil_vv_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -705,66 +705,56 @@ define @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v25, v0, a2 +; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: lui a3, %hi(.LCPI32_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 3 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll index ed434deea1a83..482cf83d540c4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-store-extract-crash.ll @@ -10,19 +10,19 @@ define void @test(ptr %ref_array, ptr %sad_array) { ; RV32-NEXT: th.lwd a2, a3, (a0), 0, 3 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV32-NEXT: vle8.v v8, (a2) -; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vzext.vf4 v12, v8 ; RV32-NEXT: vmv.s.x v8, zero -; RV32-NEXT: vredsum.vs v10, v12, v8 -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredsum.vs v9, v12, v8 +; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: th.swia a0, (a1), 4, 0 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV32-NEXT: vle8.v v10, (a3) +; RV32-NEXT: vle8.v v9, (a3) +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vi v10, v9, 4 +; RV32-NEXT: vslideup.vi v9, v10, 4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vzext.vf4 v12, v10 +; RV32-NEXT: vzext.vf4 v12, v9 ; RV32-NEXT: vredsum.vs v8, v12, v8 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a1) @@ -33,19 +33,19 @@ define void @test(ptr %ref_array, ptr %sad_array) { ; RV64-NEXT: th.ldd a2, a3, (a0), 0, 4 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64-NEXT: vle8.v v8, (a2) -; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vzext.vf4 v12, v8 ; RV64-NEXT: vmv.s.x v8, zero -; RV64-NEXT: vredsum.vs v10, v12, v8 -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vredsum.vs v9, v12, v8 +; RV64-NEXT: vmv.x.s a0, v9 ; RV64-NEXT: th.swia a0, (a1), 4, 0 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vle8.v v10, (a3) +; RV64-NEXT: vle8.v v9, (a3) +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vi v10, v9, 4 +; RV64-NEXT: vslideup.vi v9, v10, 4 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vzext.vf4 v12, v10 +; RV64-NEXT: vzext.vf4 v12, v9 ; RV64-NEXT: vredsum.vs v8, v12, v8 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll index 673008d9c0b3d..52811133c53f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll @@ -197,28 +197,51 @@ entry: define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data) { ; RV64-LABEL: test_compresstore_v256i8: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v7, v8 +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 128 ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64-NEXT: vle8.v v24, (a1) +; RV64-NEXT: vle8.v v16, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vslidedown.vi v9, v0, 1 ; RV64-NEXT: vmv.x.s a1, v9 ; RV64-NEXT: vmv.x.s a3, v0 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 3 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 16 +; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64-NEXT: vcompress.vm v8, v16, v0 +; RV64-NEXT: vcompress.vm v16, v24, v0 ; RV64-NEXT: vcpop.m a4, v0 ; RV64-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; RV64-NEXT: vse8.v v8, (a0) +; RV64-NEXT: vse8.v v16, (a0) +; RV64-NEXT: addi a4, sp, 16 +; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64-NEXT: vcompress.vm v8, v24, v7 -; RV64-NEXT: vcpop.m a2, v7 +; RV64-NEXT: vcompress.vm v16, v24, v8 +; RV64-NEXT: vcpop.m a2, v8 ; RV64-NEXT: cpop a3, a3 ; RV64-NEXT: cpop a1, a1 ; RV64-NEXT: add a0, a0, a3 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64-NEXT: vse8.v v8, (a0) +; RV64-NEXT: vse8.v v16, (a0) +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; RV32-LABEL: test_compresstore_v256i8: @@ -796,18 +819,18 @@ define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data) ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vse64.v v24, (a0) ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: vslidedown.vi v8, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vcompress.vm v8, v16, v24 +; RV64-NEXT: vcompress.vm v24, v16, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vmv.x.s a1, v0 ; RV64-NEXT: zext.h a1, a1 ; RV64-NEXT: cpopw a1, a1 ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: vcpop.m a1, v24 +; RV64-NEXT: vcpop.m a1, v8 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vse64.v v24, (a0) ; RV64-NEXT: ret ; ; RV32-LABEL: test_compresstore_v32i64: @@ -818,18 +841,18 @@ define void @test_compresstore_v32i64(ptr %p, <32 x i1> %mask, <32 x i64> %data) ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vse64.v v24, (a0) ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v24, v0, 2 +; RV32-NEXT: vslidedown.vi v8, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vcompress.vm v8, v16, v24 +; RV32-NEXT: vcompress.vm v24, v16, v8 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vmv.x.s a1, v0 ; RV32-NEXT: zext.h a1, a1 ; RV32-NEXT: cpop a1, a1 ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: vcpop.m a1, v24 +; RV32-NEXT: vcpop.m a1, v8 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: vse64.v v24, (a0) ; RV32-NEXT: ret entry: tail call void @llvm.masked.compressstore.v32i64(<32 x i64> %data, ptr align 8 %p, <32 x i1> %mask) diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll index bd65ed52be680..1343b64b876dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll @@ -189,16 +189,16 @@ define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) { ; RV32-LABEL: v4xi64_concat_vector_insert_idx3: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vle64.v v10, (a1) +; RV32-NEXT: vle64.v v8, (a1) +; RV32-NEXT: vle64.v v10, (a0) ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV32-NEXT: vslide1down.vx v9, v8, a2 ; RV32-NEXT: vslide1down.vx v9, v9, a3 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v10, v9, 1 +; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 2 -; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: vslideup.vi v10, v8, 2 +; RV32-NEXT: vse64.v v10, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: v4xi64_concat_vector_insert_idx3: diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll index 113154c0f9855..7839b602706db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll @@ -19,18 +19,19 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan ; RV32-LABEL: constant_folding_crash: ; RV32: # %bb.0: # %entry ; RV32-NEXT: lw a0, 8(a0) -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: seqz a0, a0 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV32-NEXT: vmv.v.x v11, a0 -; RV32-NEXT: vmsne.vi v0, v11, 0 +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vmsne.vi v10, v10, 0 +; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmv1r.v v0, v11 ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-NEXT: vrgather.vi v9, v8, 0 ; RV32-NEXT: vmsne.vi v0, v9, 0 @@ -42,18 +43,19 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan ; RV64-LABEL: constant_folding_crash: ; RV64: # %bb.0: # %entry ; RV64-NEXT: ld a0, 8(a0) -; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: andi a0, a0, 1 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vmv.v.x v13, a0 -; RV64-NEXT: vmsne.vi v0, v13, 0 +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vmsne.vi v12, v12, 0 +; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-NEXT: vrgather.vi v9, v8, 0 ; RV64-NEXT: vmsne.vi v0, v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll index 41ec102c34efb..6e538f3dfb38e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -3341,16 +3341,16 @@ define @ctlz_zero_undef_nxv8i64( %va) { ; ; RV32F-LABEL: ctlz_zero_undef_nxv8i64: ; RV32F: # %bb.0: -; RV32F-NEXT: vmv8r.v v16, v8 ; RV32F-NEXT: li a0, 190 ; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32F-NEXT: vmv.v.x v8, a0 +; RV32F-NEXT: vmv.v.x v16, a0 ; RV32F-NEXT: fsrmi a0, 1 ; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32F-NEXT: vfncvt.f.xu.w v24, v16 -; RV32F-NEXT: vsrl.vi v16, v24, 23 -; RV32F-NEXT: vwsubu.wv v8, v8, v16 +; RV32F-NEXT: vfncvt.f.xu.w v24, v8 +; RV32F-NEXT: vsrl.vi v8, v24, 23 +; RV32F-NEXT: vwsubu.wv v16, v16, v8 ; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: vmv8r.v v8, v16 ; RV32F-NEXT: ret ; ; RV64F-LABEL: ctlz_zero_undef_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index 86086f5dc88f7..fff280c005b54 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -1259,8 +1259,8 @@ define @vp_ctlz_nxv16i64( %va, @vp_ctlz_nxv16i64( %va, @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) @@ -2487,8 +2487,8 @@ define @vp_ctlz_zero_undef_nxv16i64( %va, ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB94_2: ; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t ; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t @@ -2512,8 +2512,8 @@ define @vp_ctlz_zero_undef_nxv16i64( %va, ; CHECK-ZVBB-NEXT: # %bb.1: ; CHECK-ZVBB-NEXT: mv a0, a1 ; CHECK-ZVBB-NEXT: .LBB94_2: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll index 883f68aec1f42..e3c53212e91b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -2024,8 +2024,7 @@ define @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @llvm.vp.ctpop.nxv16i64( %va, %m, i32 %evl) @@ -2375,13 +2378,13 @@ define @vp_ctpop_nxv16i64_unmasked( %va, ; RV32-NEXT: addi a3, a3, 1365 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v0, a3 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v24, v0 ; RV32-NEXT: vsub.vv v24, v16, v24 ; RV32-NEXT: lui a3, 209715 @@ -2404,20 +2407,20 @@ define @vp_ctpop_nxv16i64_unmasked( %va, ; RV32-NEXT: addi a3, a3, -241 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a3 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: lui a3, 4112 ; RV32-NEXT: addi a3, a3, 257 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v16, v16, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index 4a001662ce2ca..0ef0a431dabc4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -2282,7 +2282,6 @@ define @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @llvm.vp.cttz.nxv16i64( %va, i1 false, %m, i32 %evl) @@ -2628,98 +2626,97 @@ define @vp_cttz_nxv16i64_unmasked( %va, i ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v8, v16, a2 ; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vand.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vand.vv v16, v16, v8 +; RV32-NEXT: vsrl.vi v24, v16, 1 ; RV32-NEXT: lui a4, 349525 ; RV32-NEXT: addi a4, a4, 1365 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a4 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vsub.vv v16, v16, v24 ; RV32-NEXT: lui a4, 209715 ; RV32-NEXT: addi a4, a4, 819 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v16, v16, v0 +; RV32-NEXT: vadd.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v16, 4 +; RV32-NEXT: vadd.vv v16, v16, v24 ; RV32-NEXT: lui a4, 61681 ; RV32-NEXT: addi a4, a4, -241 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a4 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v16, v8 ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 ; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vmv.v.x v16, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v16, v8 +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v8, v16, a3 +; RV32-NEXT: vsrl.vx v8, v8, a3 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: bltu a0, a1, .LBB47_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB47_2: +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vx v16, v24, a2 +; RV32-NEXT: vsub.vx v8, v24, a2 ; RV32-NEXT: vnot.v v24, v24 -; RV32-NEXT: vand.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 1 +; RV32-NEXT: vand.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v24 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v16, v8 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vsrl.vx v8, v8, a3 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -4038,13 +4035,12 @@ define @vp_cttz_zero_undef_nxv16i64( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB94_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t ; CHECK-NEXT: vand.vv v8, v8, v16, v0.t ; CHECK-NEXT: fsrmi a0, 1 @@ -4077,8 +4073,8 @@ define @vp_cttz_zero_undef_nxv16i64( %va, ; CHECK-ZVBB-NEXT: # %bb.1: ; CHECK-ZVBB-NEXT: mv a0, a1 ; CHECK-ZVBB-NEXT: .LBB94_2: -; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index 498a633922ba2..14719e190a693 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -139,22 +139,22 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: addi a3, sp, 64 ; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a4, a0, a2 -; RV32-NEXT: vl8r.v v16, (a4) +; RV32-NEXT: add a3, a0, a2 +; RV32-NEXT: vl8r.v v16, (a3) ; RV32-NEXT: vl8r.v v24, (a0) -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: add a1, a0, a1 +; RV32-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; RV32-NEXT: vmseq.vi v8, v16, 0 ; RV32-NEXT: vmseq.vi v0, v24, 0 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vmerge.vim v24, v16, 1, v0 -; RV32-NEXT: vs8r.v v24, (a3) -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: vs8r.v v24, (a0) +; RV32-NEXT: add a0, a0, a2 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vmerge.vim v8, v16, 1, v0 -; RV32-NEXT: vs8r.v v8, (a2) +; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -80 ; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload @@ -179,22 +179,22 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: addi a3, sp, 64 ; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a4, a0, a2 -; RV64-NEXT: vl8r.v v16, (a4) +; RV64-NEXT: add a3, a0, a2 +; RV64-NEXT: vl8r.v v16, (a3) ; RV64-NEXT: vl8r.v v24, (a0) -; RV64-NEXT: add a1, a3, a1 -; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: add a1, a0, a1 +; RV64-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; RV64-NEXT: vmseq.vi v8, v16, 0 ; RV64-NEXT: vmseq.vi v0, v24, 0 ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vmerge.vim v24, v16, 1, v0 -; RV64-NEXT: vs8r.v v24, (a3) -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: vs8r.v v24, (a0) +; RV64-NEXT: add a0, a0, a2 ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vmerge.vim v8, v16, 1, v0 -; RV64-NEXT: vs8r.v v8, (a2) +; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -80 ; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll index 875f4f239028b..6b8d778bc3242 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll @@ -35,10 +35,10 @@ define <512 x i8> @single_source(<512 x i8> %a) { ; CHECK-NEXT: vslidedown.vi v16, v16, 4 ; CHECK-NEXT: li a0, 466 ; CHECK-NEXT: li a1, 465 +; CHECK-NEXT: lbu a2, 1012(sp) ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma -; CHECK-NEXT: lbu a0, 1012(sp) ; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv.s.x v16, a2 ; CHECK-NEXT: li a0, 501 ; CHECK-NEXT: li a1, 500 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma @@ -118,16 +118,16 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: vslidedown.vi v24, v24, 4 ; CHECK-NEXT: li a1, 466 ; CHECK-NEXT: li a2, 465 +; CHECK-NEXT: lbu a3, 985(sp) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: lbu a1, 985(sp) ; CHECK-NEXT: vslideup.vx v8, v24, a2 -; CHECK-NEXT: vmv.s.x v24, a1 +; CHECK-NEXT: vmv.s.x v24, a3 ; CHECK-NEXT: li a1, 478 ; CHECK-NEXT: li a2, 477 +; CHECK-NEXT: lbu a3, 1012(sp) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: lbu a1, 1012(sp) ; CHECK-NEXT: vslideup.vx v8, v24, a2 -; CHECK-NEXT: vmv.s.x v24, a1 +; CHECK-NEXT: vmv.s.x v24, a3 ; CHECK-NEXT: li a1, 501 ; CHECK-NEXT: li a2, 500 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma @@ -137,21 +137,21 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: addi a1, a1, 501 ; CHECK-NEXT: slli a1, a1, 13 ; CHECK-NEXT: addi a1, a1, 512 +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: lui a2, 1047552 +; CHECK-NEXT: addiw a2, a2, 1 +; CHECK-NEXT: slli a2, a2, 23 +; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: slli a2, a2, 18 +; CHECK-NEXT: vslide1down.vx v0, v24, a2 +; CHECK-NEXT: lui a2, 4 +; CHECK-NEXT: vmv.s.x v24, a2 ; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetivli zero, 7, e64, m1, tu, ma +; CHECK-NEXT: vslideup.vi v0, v24, 6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a1 -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v7, 0 -; CHECK-NEXT: lui a1, 1047552 -; CHECK-NEXT: addiw a1, a1, 1 -; CHECK-NEXT: slli a1, a1, 23 -; CHECK-NEXT: addi a1, a1, 1 -; CHECK-NEXT: slli a1, a1, 18 -; CHECK-NEXT: vslide1down.vx v0, v7, a1 -; CHECK-NEXT: lui a1, 4 -; CHECK-NEXT: vmv.s.x v7, a1 -; CHECK-NEXT: vsetivli zero, 7, e64, m1, tu, ma -; CHECK-NEXT: vslideup.vi v0, v7, 6 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-NEXT: addi sp, s0, -1536 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll index c0d366760d079..f3e823562888f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll @@ -417,8 +417,8 @@ declare <32 x i64> @llvm.vp.abs.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32) define <32 x i64> @vp_abs_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_abs_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB34_2 @@ -432,8 +432,8 @@ define <32 x i64> @vp_abs_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v24, v16, 0, v0.t ; CHECK-NEXT: vmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 943fc58d637a0..068c25b821002 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -847,27 +847,27 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3, v0.t ; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4, v0.t -; RV32-NEXT: vsll.vi v10, v10, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vlse64.v v10, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11, v0.t +; RV32-NEXT: vand.vx v11, v8, a4, v0.t +; RV32-NEXT: vsll.vi v11, v11, 24, v0.t +; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: vor.vv v11, v11, v12, v0.t +; RV32-NEXT: vor.vv v9, v9, v11, v0.t +; RV32-NEXT: vsrl.vx v11, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t ; RV32-NEXT: vand.vx v12, v12, a2, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vor.vv v11, v12, v11, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t ; RV32-NEXT: vand.vx v12, v12, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v11, v0.t +; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: vor.vv v8, v8, v11, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: lui a1, 61681 @@ -982,27 +982,27 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4 -; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vlse64.v v10, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vsrl.vx v11, v8, a1 ; RV32-NEXT: vsrl.vx v12, v8, a3 ; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vor.vv v11, v12, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vand.vx v12, v12, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v11 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1119,27 +1119,27 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3, v0.t ; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v12, v12, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14, v0.t +; RV32-NEXT: vand.vx v14, v8, a4, v0.t +; RV32-NEXT: vsll.vi v14, v14, 24, v0.t +; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t +; RV32-NEXT: vor.vv v14, v14, v16, v0.t +; RV32-NEXT: vor.vv v10, v10, v14, v0.t +; RV32-NEXT: vsrl.vx v14, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t ; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v12, v16, v12, v0.t +; RV32-NEXT: vor.vv v14, v16, v14, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: vand.vx v16, v16, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v14, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v8, v14, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: lui a1, 61681 @@ -1254,27 +1254,27 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vsrl.vx v14, v8, a1 ; RV32-NEXT: vsrl.vx v16, v8, a3 ; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v14, v16, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v14 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1391,13 +1391,13 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3, v0.t ; RV32-NEXT: vor.vv v16, v12, v16, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v20, v12, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vx v20, v8, a4, v0.t +; RV32-NEXT: vsll.vi v20, v20, 24, v0.t ; RV32-NEXT: vand.vv v24, v8, v12, v0.t ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t ; RV32-NEXT: vor.vv v20, v20, v24, v0.t @@ -1526,27 +1526,27 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a5), zero +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v20, v20, v24 +; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vsrl.vx v20, v8, a1 ; RV32-NEXT: vsrl.vx v24, v8, a3 ; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vor.vv v20, v24, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vand.vx v24, v24, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v20 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1685,20 +1685,23 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v24, 8, v0.t +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 @@ -1711,10 +1714,10 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -1727,38 +1730,38 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 4, v0.t +; RV32-NEXT: vor.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsll.vi v16, v16, 2, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t @@ -1885,60 +1888,60 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v24, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v0, v8, a3 ; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v0, v0, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v0 +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 2 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -2049,20 +2052,23 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v24, 8, v0.t +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 @@ -2075,10 +2081,10 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -2091,38 +2097,38 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 4, v0.t +; RV32-NEXT: vor.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsll.vi v16, v16, 2, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t @@ -2249,60 +2255,60 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v24, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v0, v8, a3 ; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v0, v0, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v0 +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 2 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -2388,8 +2394,8 @@ define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 ze ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB34_2 @@ -2427,13 +2433,13 @@ define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 ze ; CHECK-NEXT: sltu a0, a0, a4 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a4 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index f80d4e5c0d7c3..1490738687322 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -295,27 +295,27 @@ define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3, v0.t ; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4, v0.t -; RV32-NEXT: vsll.vi v10, v10, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vlse64.v v10, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11, v0.t +; RV32-NEXT: vand.vx v11, v8, a4, v0.t +; RV32-NEXT: vsll.vi v11, v11, 24, v0.t +; RV32-NEXT: vand.vv v12, v8, v10, v0.t ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: vor.vv v11, v11, v12, v0.t +; RV32-NEXT: vor.vv v9, v9, v11, v0.t +; RV32-NEXT: vsrl.vx v11, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t ; RV32-NEXT: vand.vx v12, v12, a2, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vor.vv v11, v12, v11, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t ; RV32-NEXT: vand.vx v12, v12, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v11, v0.t +; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: vor.vv v8, v8, v11, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -373,27 +373,27 @@ define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4 -; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vlse64.v v10, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vand.vv v12, v8, v10 ; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vsrl.vx v11, v8, a1 ; RV32-NEXT: vsrl.vx v12, v8, a3 ; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vor.vv v11, v12, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vand.vx v12, v12, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v11 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -453,27 +453,27 @@ define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3, v0.t ; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v12, v12, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14, v0.t +; RV32-NEXT: vand.vx v14, v8, a4, v0.t +; RV32-NEXT: vsll.vi v14, v14, 24, v0.t +; RV32-NEXT: vand.vv v16, v8, v12, v0.t ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t +; RV32-NEXT: vor.vv v14, v14, v16, v0.t +; RV32-NEXT: vor.vv v10, v10, v14, v0.t +; RV32-NEXT: vsrl.vx v14, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t ; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v12, v16, v12, v0.t +; RV32-NEXT: vor.vv v14, v16, v14, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: vand.vx v16, v16, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v14, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: vor.vv v8, v8, v14, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -531,27 +531,27 @@ define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v16, v8, v12 ; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vsrl.vx v14, v8, a1 ; RV32-NEXT: vsrl.vx v16, v8, a3 ; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v14, v16, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v14 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -611,13 +611,13 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3, v0.t ; RV32-NEXT: vor.vv v16, v12, v16, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v20, v12, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v12, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vx v20, v8, a4, v0.t +; RV32-NEXT: vsll.vi v20, v20, 24, v0.t ; RV32-NEXT: vand.vv v24, v8, v12, v0.t ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t ; RV32-NEXT: vor.vv v20, v20, v24, v0.t @@ -689,27 +689,27 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a5), zero +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v20, v20, v24 +; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vsrl.vx v20, v8, a1 ; RV32-NEXT: vsrl.vx v24, v8, a3 ; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vor.vv v20, v24, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vand.vx v24, v24, a4 ; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v20 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -779,20 +779,23 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v24, 8, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 @@ -805,10 +808,10 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -913,13 +916,13 @@ define <15 x i64> @vp_bswap_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -1010,20 +1013,23 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v16, v24, 8, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 @@ -1036,10 +1042,10 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -1144,13 +1150,13 @@ define <16 x i64> @vp_bswap_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: addi a4, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vx v0, v8, a4 +; RV32-NEXT: vsll.vi v0, v0, 24 ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vor.vv v24, v0, v24 @@ -1228,8 +1234,8 @@ define <128 x i16> @vp_bswap_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 @@ -1246,13 +1252,13 @@ define <128 x i16> @vp_bswap_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t ; CHECK-NEXT: vor.vv v16, v8, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll index af7d7f7ae755b..65a1035fd815c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll @@ -567,13 +567,14 @@ define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b, ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vmv.s.x v10, a2 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v12, (a0) ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vmv.s.x v10, a3 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) -; CHECK-NEXT: vle32.v v12, (a0) ; CHECK-NEXT: vslideup.vi v8, v10, 7 ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index 3e2af7e8267b9..befbfb88550ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -204,8 +204,8 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 3 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.ceil.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 3 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 3 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -439,8 +439,8 @@ define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -483,8 +483,8 @@ define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index 2f4539d5038c2..b42fb8c686164 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -1503,24 +1503,28 @@ declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1535,37 +1539,60 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vnot.v v24, v8, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v15i64: @@ -1655,33 +1682,29 @@ define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1743,24 +1766,28 @@ declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1775,37 +1802,60 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vnot.v v24, v8, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v16i64: @@ -1895,33 +1945,29 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1991,7 +2037,7 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -2035,111 +2081,145 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -2147,13 +2227,13 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t @@ -2171,18 +2251,18 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -2191,17 +2271,35 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -2211,7 +2309,7 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2219,21 +2317,21 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -2257,8 +2355,8 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 @@ -2315,13 +2413,13 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a7, vlenb +; RV64-NEXT: slli a7, a7, 3 +; RV64-NEXT: add a7, sp, a7 +; RV64-NEXT: addi a7, a7, 16 +; RV64-NEXT: vl8r.v v8, (a7) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: vor.vv v16, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t @@ -2364,10 +2462,14 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -2391,74 +2493,8 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB35_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 1 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 2 ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 4 @@ -2467,41 +2503,84 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 16 ; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsrl.vx v24, v8, a2 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vnot.v v0, v8 +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 ; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vsub.vv v24, v0, v24 +; RV32-NEXT: vand.vv v0, v24, v8 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vadd.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vadd.vv v24, v24, v0 +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v0, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 2 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 8 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 16 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vx v0, v24, a2 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vnot.v v24, v24 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: vsub.vv v16, v24, v16 +; RV32-NEXT: vand.vv v24, v16, v8 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v0, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v24, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret @@ -4060,24 +4139,28 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -4092,37 +4175,60 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vnot.v v24, v8, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v15i64: @@ -4212,33 +4318,29 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -4298,24 +4400,28 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -4330,37 +4436,60 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vnot.v v24, v8, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v16i64: @@ -4450,33 +4579,29 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -4544,7 +4669,7 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -4588,111 +4713,145 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -4700,13 +4859,13 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t @@ -4724,18 +4883,18 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4744,17 +4903,35 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -4764,7 +4941,7 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -4772,21 +4949,21 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4810,8 +4987,8 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB70_2 @@ -4868,13 +5045,13 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a7, vlenb +; RV64-NEXT: slli a7, a7, 3 +; RV64-NEXT: add a7, sp, a7 +; RV64-NEXT: addi a7, a7, 16 +; RV64-NEXT: vl8r.v v8, (a7) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: vor.vv v16, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t @@ -4917,10 +5094,14 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -4944,74 +5125,8 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB71_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 1 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 2 ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 4 @@ -5020,41 +5135,84 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 16 ; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsrl.vx v24, v8, a2 ; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vnot.v v0, v8 +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 ; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vsub.vv v24, v0, v24 +; RV32-NEXT: vand.vv v0, v24, v8 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vadd.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vadd.vv v24, v24, v0 +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v0, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 2 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 8 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v0, v24, 16 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vx v0, v24, a2 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vnot.v v24, v24 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: vsub.vv v16, v24, v16 +; RV32-NEXT: vand.vv v24, v16, v8 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v0, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v24, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index 0b6d8b33394d5..5fceab869ab85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1119,55 +1119,93 @@ declare <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64>, <15 x i1>, i32) define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v15i64: @@ -1228,34 +1266,29 @@ define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1303,55 +1336,93 @@ declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32) define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v16i64: @@ -1412,34 +1483,29 @@ define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1495,11 +1561,16 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: lui a1, 349525 @@ -1524,74 +1595,93 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB34_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a2, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a2), zero ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: addi a2, sp, 32 +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v24, v8, v16, v0.t -; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV32-NEXT: vadd.vv v16, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vsub.vv v24, v8, v24, v0.t +; RV32-NEXT: vand.vv v8, v24, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: addi a2, sp, 48 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t ; RV32-NEXT: csrr a2, vlenb @@ -1603,14 +1693,13 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -1625,20 +1714,37 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb @@ -1666,8 +1772,8 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 @@ -1710,13 +1816,13 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: sltu a0, a0, a6 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a6 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 16 +; RV64-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v16, v8, v16, v0.t @@ -1746,12 +1852,11 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1777,97 +1882,67 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB35_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a2, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a2), zero -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a2), zero +; RV32-NEXT: vlse64.v v24, (a2), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsub.vv v8, v8, v0 +; RV32-NEXT: vand.vv v0, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a2, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a2), zero -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v0, v8 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 ; RV32-NEXT: addi a2, sp, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v16, v16, v8 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v16, a1 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, a0, -16 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: addi a2, sp, 24 ; RV32-NEXT: vsub.vv v16, v8, v16 -; RV32-NEXT: vand.vv v8, v16, v0 +; RV32-NEXT: vand.vv v0, v16, v24 ; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vadd.vv v16, v0, v16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v0, (a2), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 4 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v16, v0 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v8, v0 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v24, a2 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index f2926fa91e5c2..e7736e7f360f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1263,59 +1263,86 @@ declare <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v15i64: @@ -1385,33 +1412,29 @@ define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1463,59 +1486,86 @@ declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v16i64: @@ -1585,33 +1635,29 @@ define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -1671,7 +1717,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1705,111 +1751,145 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -1817,13 +1897,13 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v8, v16, a1, v0.t ; RV32-NEXT: vnot.v v16, v16, v0.t ; RV32-NEXT: vand.vv v8, v16, v8, v0.t @@ -1831,18 +1911,18 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1851,17 +1931,35 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -1871,7 +1969,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1879,21 +1977,21 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1917,8 +2015,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a1, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: bltu a0, a1, .LBB34_2 @@ -1965,13 +2063,13 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a7, vlenb +; RV64-NEXT: slli a7, a7, 3 +; RV64-NEXT: add a7, sp, a7 +; RV64-NEXT: addi a7, a7, 16 +; RV64-NEXT: vl8r.v v8, (a7) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t @@ -2004,10 +2102,14 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -2032,96 +2134,73 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: .LBB35_2: ; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a2 +; RV32-NEXT: vsub.vx v24, v8, a2 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v0, v8, v24 ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v24, v0, v24 +; RV32-NEXT: vand.vv v0, v24, v8 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vadd.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vadd.vv v24, v24, v0 ; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v0, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v24, v0, a2 +; RV32-NEXT: vnot.v v0, v0 +; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: vsub.vv v16, v24, v16 +; RV32-NEXT: vand.vv v24, v16, v8 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v24, a2 -; RV32-NEXT: vnot.v v24, v24 -; RV32-NEXT: vand.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v0, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v24, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret @@ -3420,59 +3499,86 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64: @@ -3542,33 +3648,29 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -3618,59 +3720,86 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v16i64: @@ -3740,33 +3869,29 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 ; RV32-NEXT: addi sp, sp, 32 @@ -3824,7 +3949,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -3858,111 +3983,145 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v16, v8, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -3970,13 +4129,13 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v8, v16, a1, v0.t ; RV32-NEXT: vnot.v v16, v16, v0.t ; RV32-NEXT: vand.vv v8, v16, v8, v0.t @@ -3984,18 +4143,18 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4004,17 +4163,35 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 @@ -4024,7 +4201,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -4032,21 +4209,21 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4070,8 +4247,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a1, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: bltu a0, a1, .LBB70_2 @@ -4118,13 +4295,13 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a7, vlenb +; RV64-NEXT: slli a7, a7, 3 +; RV64-NEXT: add a7, sp, a7 +; RV64-NEXT: addi a7, a7, 16 +; RV64-NEXT: vl8r.v v8, (a7) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t @@ -4157,10 +4334,14 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb -; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -4185,96 +4366,73 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: .LBB71_2: ; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a2 +; RV32-NEXT: vsub.vx v24, v8, a2 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v0, v8, v24 ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v24, v0, v24 +; RV32-NEXT: vand.vv v0, v24, v8 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vadd.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 4 +; RV32-NEXT: vadd.vv v24, v24, v0 ; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v0, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v24, v0, a2 +; RV32-NEXT: vnot.v v0, v0 +; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: vsub.vv v16, v24, v16 +; RV32-NEXT: vand.vv v24, v16, v8 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v24, a2 -; RV32-NEXT: vnot.v v24, v24 -; RV32-NEXT: vand.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v0, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v24, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index 1587f770f87ca..9f8de22b25c2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -15,16 +15,16 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vadd.vv v11, v9, v9 -; CHECK-NEXT: vrgather.vv v9, v10, v11 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vrgather.vv v9, v10, v11 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t ; CHECK-NEXT: vmsne.vi v9, v9, 0 ; CHECK-NEXT: vadd.vi v12, v11, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index dccb62877af3c..386c71cf665ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -326,9 +326,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: andi sp, sp, -128 ; RV32-NEXT: andi a1, a1, 255 ; RV32-NEXT: li a2, 128 +; RV32-NEXT: addi a3, a0, 128 ; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV32-NEXT: addi a2, a0, 128 -; RV32-NEXT: vle8.v v16, (a2) +; RV32-NEXT: vle8.v v16, (a3) ; RV32-NEXT: vle8.v v24, (a0) ; RV32-NEXT: mv a0, sp ; RV32-NEXT: add a1, a0, a1 @@ -357,9 +357,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: andi sp, sp, -128 ; RV64-NEXT: andi a1, a1, 255 ; RV64-NEXT: li a2, 128 +; RV64-NEXT: addi a3, a0, 128 ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64-NEXT: addi a2, a0, 128 -; RV64-NEXT: vle8.v v16, (a2) +; RV64-NEXT: vle8.v v16, (a3) ; RV64-NEXT: vle8.v v24, (a0) ; RV64-NEXT: mv a0, sp ; RV64-NEXT: add a1, a0, a1 @@ -388,9 +388,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: andi sp, sp, -128 ; RV32ZBS-NEXT: andi a1, a1, 255 ; RV32ZBS-NEXT: li a2, 128 +; RV32ZBS-NEXT: addi a3, a0, 128 ; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV32ZBS-NEXT: addi a2, a0, 128 -; RV32ZBS-NEXT: vle8.v v16, (a2) +; RV32ZBS-NEXT: vle8.v v16, (a3) ; RV32ZBS-NEXT: vle8.v v24, (a0) ; RV32ZBS-NEXT: mv a0, sp ; RV32ZBS-NEXT: add a1, a0, a1 @@ -419,9 +419,9 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: andi sp, sp, -128 ; RV64ZBS-NEXT: andi a1, a1, 255 ; RV64ZBS-NEXT: li a2, 128 +; RV64ZBS-NEXT: addi a3, a0, 128 ; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; RV64ZBS-NEXT: addi a2, a0, 128 -; RV64ZBS-NEXT: vle8.v v16, (a2) +; RV64ZBS-NEXT: vle8.v v16, (a3) ; RV64ZBS-NEXT: vle8.v v24, (a0) ; RV64ZBS-NEXT: mv a0, sp ; RV64ZBS-NEXT: add a1, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index 0237c1867ebba..d309da6df7dc7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -710,9 +710,9 @@ define i32 @extractelt_v64i32_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV32-NEXT: andi a1, a1, 63 ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: li a2, 32 +; RV32-NEXT: addi a3, a0, 128 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: addi a2, a0, 128 -; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: vle32.v v16, (a0) ; RV32-NEXT: mv a0, sp ; RV32-NEXT: add a1, a0, a1 @@ -738,9 +738,9 @@ define i32 @extractelt_v64i32_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV64-NEXT: andi a1, a1, 63 ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: li a2, 32 +; RV64-NEXT: addi a3, a0, 128 ; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV64-NEXT: addi a2, a0, 128 -; RV64-NEXT: vle32.v v8, (a2) +; RV64-NEXT: vle32.v v8, (a3) ; RV64-NEXT: vle32.v v16, (a0) ; RV64-NEXT: mv a0, sp ; RV64-NEXT: add a1, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index 287dd510674d1..c1b4c5fda6c64 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -204,8 +204,8 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 2 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.floor.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 2 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 2 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -439,8 +439,8 @@ define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -483,8 +483,8 @@ define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index edb33158e32eb..51eb63f5f9221 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -177,8 +177,8 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 ; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t @@ -253,8 +253,8 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t @@ -608,7 +608,6 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) @@ -618,28 +617,28 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB24_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB24_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: csrr a0, vlenb @@ -666,13 +665,13 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: csrr a0, vlenb @@ -759,9 +758,9 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 48649c43f782a..03e0ac42c442c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -177,8 +177,8 @@ define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v10 ; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t @@ -253,8 +253,8 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t @@ -608,7 +608,6 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) @@ -618,28 +617,28 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB24_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB24_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: csrr a0, vlenb @@ -666,13 +665,13 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: csrr a0, vlenb @@ -759,9 +758,9 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 9e83efd351953..379a51f4eee30 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -39,9 +39,9 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, ; CHECK-NEXT: vmul.vx v14, v12, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vadd.vi v8, v14, -14 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vadd.vi v8, v14, -14 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v12 @@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float ; CHECK-NEXT: vfmv.v.f v8, fa4 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x float> poison, float %e0, i64 0 @@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d ; CHECK-NEXT: vfmv.v.f v8, fa4 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x double> poison, double %e0, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index ed152e64a91ef..f3b124aa34dcb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -56,9 +56,9 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 -; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: vmv.v.i v0, 10 +; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret @@ -261,13 +261,13 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-NEXT: vwmaccu.vx v8, a0, v16 ; V128-NEXT: lui a1, 699051 ; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: li a2, 32 ; V128-NEXT: vmv.s.x v0, a1 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v8, v24, v0 -; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: addi a1, sp, 16 ; V128-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: vwaddu.vv v0, v16, v8 ; V128-NEXT: vwmaccu.vx v0, a0, v8 ; V128-NEXT: vmv8r.v v8, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index 5886653a94b7c..45c0a22b1939f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -92,12 +92,11 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v14, (a0) -; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 8 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 ; CHECK-NEXT: vrgather.vi v12, v10, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -108,13 +107,13 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; CHECK-LABEL: vrgather_shuffle_xv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vrsub.vi v12, v12, 4 +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vrsub.vi v12, v12, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -128,12 +127,12 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) +; CHECK-NEXT: vlse64.v v10, (a0), zero ; CHECK-NEXT: li a0, 3 -; CHECK-NEXT: lui a1, %hi(.LCPI8_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI8_0) -; CHECK-NEXT: vlse64.v v10, (a1), zero -; CHECK-NEXT: vmul.vx v12, v12, a0 ; CHECK-NEXT: vmv.v.i v0, 3 +; CHECK-NEXT: vmul.vx v12, v12, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 0f003d7af6100..d25312268ada6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1199,7 +1199,7 @@ declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; ZVFH-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; ZVFH-NEXT: vle32.v v8, (a1) ; ZVFH-NEXT: vle16.v v9, (a0) ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll index 6320b07125bb0..bc46e7d264bc0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -351,25 +351,23 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa3, v8 -; RV32-NEXT: feq.d a0, fa3, fa3 +; RV32-NEXT: feq.d a2, fa3, fa3 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 -; RV32-NEXT: fcvt.w.d a2, fa3, rtz +; RV32-NEXT: fcvt.w.d a3, fa3, rtz ; RV32-NEXT: fld fa3, 40(sp) -; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vslide1down.vx v8, v10, a0 +; RV32-NEXT: neg a0, a2 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: feq.d a2, fa3, fa3 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a3, fa3, rtz ; RV32-NEXT: fld fa3, 32(sp) -; RV32-NEXT: vslide1down.vx v8, v10, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: neg a0, a2 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: feq.d a2, fa3, fa3 @@ -395,8 +393,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmin.d fa5, fa5, fa4 ; RV32-NEXT: fcvt.w.d a2, fa5, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: addi sp, s0, -128 @@ -452,25 +450,23 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 3 ; RV64-NEXT: vfmv.f.s fa3, v8 -; RV64-NEXT: feq.d a0, fa3, fa3 +; RV64-NEXT: feq.d a2, fa3, fa3 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 -; RV64-NEXT: fcvt.l.d a2, fa3, rtz +; RV64-NEXT: fcvt.l.d a3, fa3, rtz ; RV64-NEXT: fld fa3, 40(sp) -; RV64-NEXT: neg a0, a0 -; RV64-NEXT: and a0, a0, a2 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vslide1down.vx v8, v10, a0 +; RV64-NEXT: neg a0, a2 +; RV64-NEXT: and a0, a0, a3 ; RV64-NEXT: feq.d a2, fa3, fa3 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a3, fa3, rtz ; RV64-NEXT: fld fa3, 32(sp) -; RV64-NEXT: vslide1down.vx v8, v10, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: neg a0, a2 ; RV64-NEXT: and a0, a0, a3 ; RV64-NEXT: feq.d a2, fa3, fa3 @@ -496,8 +492,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmin.d fa5, fa5, fa4 ; RV64-NEXT: fcvt.l.d a2, fa5, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: vmv.v.i v0, 15 +; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: addi sp, s0, -128 @@ -542,46 +538,43 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a2, fa4, rtz -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.x v10, a2 -; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 2 -; RV32-NEXT: vfmv.f.s fa4, v12 +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vfmv.f.s fa4, v10 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 -; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: fcvt.wu.d a3, fa4, rtz ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa4, v8 -; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fld fa2, 40(sp) -; RV32-NEXT: fmin.d fa4, fa4, fa5 -; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: fld fa4, 32(sp) -; RV32-NEXT: fmax.d fa2, fa2, fa3 -; RV32-NEXT: fmin.d fa2, fa2, fa5 -; RV32-NEXT: fcvt.wu.d a2, fa2, rtz ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 -; RV32-NEXT: fld fa2, 48(sp) -; RV32-NEXT: fcvt.wu.d a3, fa4, rtz +; RV32-NEXT: fcvt.wu.d a4, fa4, rtz +; RV32-NEXT: fmax.d fa4, fa2, fa3 +; RV32-NEXT: fld fa2, 32(sp) +; RV32-NEXT: fmin.d fa4, fa4, fa5 +; RV32-NEXT: fcvt.wu.d a5, fa4, rtz ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV32-NEXT: vslide1down.vx v8, v10, a0 +; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: fmax.d fa4, fa2, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 +; RV32-NEXT: fcvt.wu.d a2, fa4, rtz +; RV32-NEXT: fld fa4, 48(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: vslide1down.vx v8, v8, a4 +; RV32-NEXT: fmax.d fa4, fa4, fa3 +; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz ; RV32-NEXT: fld fa4, 56(sp) -; RV32-NEXT: vmv.v.x v9, a3 -; RV32-NEXT: vslide1down.vx v9, v9, a2 +; RV32-NEXT: vmv.v.x v9, a2 +; RV32-NEXT: vslide1down.vx v9, v9, a5 ; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa5, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa5, rtz -; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v9, v9, a0 ; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: addi sp, s0, -128 @@ -618,46 +611,43 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a2, fa4, rtz -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vmv.v.x v10, a2 -; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 2 -; RV64-NEXT: vfmv.f.s fa4, v12 +; RV64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-NEXT: vfmv.f.s fa4, v10 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 -; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-NEXT: fcvt.lu.d a3, fa4, rtz ; RV64-NEXT: vslidedown.vi v8, v8, 3 ; RV64-NEXT: vfmv.f.s fa4, v8 -; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fld fa2, 40(sp) -; RV64-NEXT: fmin.d fa4, fa4, fa5 -; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: fld fa4, 32(sp) -; RV64-NEXT: fmax.d fa2, fa2, fa3 -; RV64-NEXT: fmin.d fa2, fa2, fa5 -; RV64-NEXT: fcvt.lu.d a2, fa2, rtz ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 -; RV64-NEXT: fld fa2, 48(sp) -; RV64-NEXT: fcvt.lu.d a3, fa4, rtz +; RV64-NEXT: fcvt.lu.d a4, fa4, rtz +; RV64-NEXT: fmax.d fa4, fa2, fa3 +; RV64-NEXT: fld fa2, 32(sp) +; RV64-NEXT: fmin.d fa4, fa4, fa5 +; RV64-NEXT: fcvt.lu.d a5, fa4, rtz ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; RV64-NEXT: vslide1down.vx v8, v10, a0 +; RV64-NEXT: vmv.v.x v8, a2 ; RV64-NEXT: fmax.d fa4, fa2, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 +; RV64-NEXT: fcvt.lu.d a2, fa4, rtz +; RV64-NEXT: fld fa4, 48(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a3 +; RV64-NEXT: vslide1down.vx v8, v8, a4 +; RV64-NEXT: fmax.d fa4, fa4, fa3 +; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz ; RV64-NEXT: fld fa4, 56(sp) -; RV64-NEXT: vmv.v.x v9, a3 -; RV64-NEXT: vslide1down.vx v9, v9, a2 +; RV64-NEXT: vmv.v.x v9, a2 +; RV64-NEXT: vslide1down.vx v9, v9, a5 ; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa5, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa5, rtz -; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: vmv.v.i v0, 15 +; RV64-NEXT: vslide1down.vx v9, v9, a0 ; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: addi sp, s0, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll index 48cc3f17a6269..f195eeadf0274 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll @@ -96,8 +96,8 @@ declare <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float>, <32 x i1>, i32) define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vfpext_v32f32_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB7_2 @@ -112,8 +112,8 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 ze ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 16 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll index 49a1b19b58a27..a4050b716e787 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll @@ -394,8 +394,8 @@ declare <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double>, <32 x i1>, i32) define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_v32i64_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 @@ -408,8 +408,8 @@ define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 ze ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll index d44efa2f6133f..b652cdd88c7c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll @@ -394,8 +394,8 @@ declare <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double>, <32 x i1>, i32) define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_v32i64_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 @@ -408,8 +408,8 @@ define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 ze ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll index d890bf5412f9f..920eed322363a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll @@ -98,8 +98,8 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 ; CHECK-LABEL: vfptrunc_v32f32_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB7_2 @@ -112,8 +112,8 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v24, v16, v0.t ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 53de1a8755355..e81f686a28303 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -277,14 +277,14 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) { define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) { ; VLA-LABEL: insert_v8i32_v2i32_2: ; VLA: # %bb.0: -; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; VLA-NEXT: vle32.v v8, (a1) ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; VLA-NEXT: vle32.v v10, (a0) +; VLA-NEXT: vle32.v v8, (a0) +; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; VLA-NEXT: vle32.v v10, (a1) ; VLA-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; VLA-NEXT: vslideup.vi v10, v8, 2 +; VLA-NEXT: vslideup.vi v8, v10, 2 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; VLA-NEXT: vse32.v v10, (a0) +; VLA-NEXT: vse32.v v8, (a0) ; VLA-NEXT: ret ; ; VLS-LABEL: insert_v8i32_v2i32_2: @@ -306,12 +306,13 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) { define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) { ; VLA-LABEL: insert_v8i32_v2i32_6: ; VLA: # %bb.0: +; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLA-NEXT: vle32.v v8, (a0) ; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; VLA-NEXT: vle32.v v8, (a1) +; VLA-NEXT: vle32.v v10, (a1) ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; VLA-NEXT: vle32.v v10, (a0) -; VLA-NEXT: vslideup.vi v10, v8, 6 -; VLA-NEXT: vse32.v v10, (a0) +; VLA-NEXT: vslideup.vi v8, v10, 6 +; VLA-NEXT: vse32.v v8, (a0) ; VLA-NEXT: ret ; ; VLS-LABEL: insert_v8i32_v2i32_6: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 4954827876c19..776a1e9bab6b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -533,11 +533,11 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: vle64.v v12, (a1) +; CHECK-NEXT: li a1, 6 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a2 +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vle64.v v12, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 4f4f0a09de748..4a5d37b2a85a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -669,13 +669,14 @@ define void @buildvec_seq_v9i8(ptr %x) { ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: li a1, 146 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v9, 2, v0 ; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret @@ -1214,11 +1215,11 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; CHECK-NEXT: vslide1down.vx v8, v8, t4 ; CHECK-NEXT: vslide1down.vx v8, v8, t5 ; CHECK-NEXT: vslide1down.vx v8, v8, t6 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: li a1, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t ; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 @@ -1308,11 +1309,11 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; CHECK-NEXT: vslide1down.vx v8, v8, t4 ; CHECK-NEXT: vslide1down.vx v8, v8, t5 ; CHECK-NEXT: vslide1down.vx v8, v8, t6 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: li a1, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t ; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 @@ -1488,11 +1489,11 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; CHECK-NEXT: vslide1down.vx v8, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a7 ; CHECK-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t ; CHECK-NEXT: ret %p4 = getelementptr i8, ptr %p, i32 31 @@ -1553,11 +1554,11 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; CHECK-NEXT: vslide1down.vx v8, v8, a7 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vslide1down.vx v8, v8, t0 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: li a1, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t ; CHECK-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll index 4509642fdef17..e0c676788dccc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll @@ -828,112 +828,104 @@ define i64 @explode_8xi64(<8 x i64> %v) { define i64 @explode_16xi64(<16 x i64> %v) { ; RV32-LABEL: explode_16xi64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -64 -; RV32-NEXT: .cfi_def_cfa_offset 64 -; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 48(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 32(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: .cfi_offset s4, -24 -; RV32-NEXT: .cfi_offset s5, -28 -; RV32-NEXT: .cfi_offset s6, -32 -; RV32-NEXT: .cfi_offset s7, -36 -; RV32-NEXT: .cfi_offset s8, -40 -; RV32-NEXT: .cfi_offset s9, -44 -; RV32-NEXT: .cfi_offset s10, -48 -; RV32-NEXT: .cfi_offset s11, -52 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 0(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: .cfi_offset s1, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: .cfi_offset s3, -16 +; RV32-NEXT: .cfi_offset s4, -20 +; RV32-NEXT: .cfi_offset s5, -24 +; RV32-NEXT: .cfi_offset s6, -28 +; RV32-NEXT: .cfi_offset s7, -32 +; RV32-NEXT: .cfi_offset s8, -36 +; RV32-NEXT: .cfi_offset s9, -40 +; RV32-NEXT: .cfi_offset s10, -44 +; RV32-NEXT: .cfi_offset s11, -48 ; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v8, 2 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsrl.vx v24, v16, a3 -; RV32-NEXT: vmv.x.s a0, v24 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sw a1, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: vslidedown.vi v16, v8, 3 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s a1, v24 -; RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: vsrl.vx v24, v16, a0 +; RV32-NEXT: vmv.x.s a3, v24 ; RV32-NEXT: vmv.x.s a4, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 4 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s a5, v24 ; RV32-NEXT: vmv.x.s a6, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 5 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s a7, v24 ; RV32-NEXT: vmv.x.s t0, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 6 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s t1, v24 ; RV32-NEXT: vmv.x.s t2, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 7 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s t3, v24 ; RV32-NEXT: vmv.x.s t4, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 8 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s t5, v24 ; RV32-NEXT: vmv.x.s t6, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 9 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s s0, v24 ; RV32-NEXT: vmv.x.s s1, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 10 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s s2, v24 ; RV32-NEXT: vmv.x.s s3, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 11 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s s4, v24 ; RV32-NEXT: vmv.x.s s5, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 12 -; RV32-NEXT: vsrl.vx v24, v16, a3 +; RV32-NEXT: vsrl.vx v24, v16, a0 ; RV32-NEXT: vmv.x.s s6, v24 ; RV32-NEXT: vmv.x.s s7, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 13 -; RV32-NEXT: vsrl.vx v24, v16, a3 -; RV32-NEXT: vmv.x.s s8, v24 -; RV32-NEXT: vmv.x.s s9, v16 +; RV32-NEXT: vsrl.vx v24, v16, a0 +; RV32-NEXT: vmv.x.s s9, v24 +; RV32-NEXT: vmv.x.s s8, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 14 -; RV32-NEXT: vsrl.vx v24, v16, a3 -; RV32-NEXT: vmv.x.s s10, v24 -; RV32-NEXT: vmv.x.s s11, v16 -; RV32-NEXT: vslidedown.vi v16, v8, 15 -; RV32-NEXT: vsrl.vx v24, v16, a3 -; RV32-NEXT: vmv.x.s ra, v24 -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: vsrl.vx v24, v16, a0 +; RV32-NEXT: vmv.s.x v17, zero ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vredxor.vs v8, v8, v9 +; RV32-NEXT: vredxor.vs v17, v8, v17 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 15 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a3 -; RV32-NEXT: vmv.x.s a3, v9 -; RV32-NEXT: add a3, a3, a0 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: lw a0, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sltu a1, a0, a1 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: lw a3, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: add a1, a1, a3 -; RV32-NEXT: add a4, a0, a4 -; RV32-NEXT: sltu a0, a4, a0 -; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: vsrl.vx v18, v17, a0 +; RV32-NEXT: vmv.x.s s10, v18 +; RV32-NEXT: vmv.x.s s11, v17 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v0, v8, a0 +; RV32-NEXT: add a1, s10, a1 +; RV32-NEXT: add a2, s11, a2 +; RV32-NEXT: sltu a0, a2, s11 ; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: add a4, a2, a4 +; RV32-NEXT: sltu a1, a4, a2 +; RV32-NEXT: add a1, a1, a5 +; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a6, a4, a6 ; RV32-NEXT: sltu a1, a6, a4 ; RV32-NEXT: add a1, a1, a7 @@ -968,33 +960,36 @@ define i64 @explode_16xi64(<16 x i64> %v) { ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add s7, s5, s7 ; RV32-NEXT: sltu a1, s7, s5 -; RV32-NEXT: add a1, a1, s8 +; RV32-NEXT: add a1, a1, s9 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add s9, s7, s9 -; RV32-NEXT: sltu a1, s9, s7 -; RV32-NEXT: add a1, a1, s10 +; RV32-NEXT: vmv.x.s a1, v24 +; RV32-NEXT: add s8, s7, s8 +; RV32-NEXT: sltu a2, s8, s7 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: vmv.x.s a2, v16 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add s11, s9, s11 -; RV32-NEXT: sltu a1, s11, s9 -; RV32-NEXT: add a1, a1, ra +; RV32-NEXT: vmv.x.s a1, v0 +; RV32-NEXT: add a2, s8, a2 +; RV32-NEXT: sltu a3, a2, s8 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, a0, a1 -; RV32-NEXT: add a0, s11, a2 -; RV32-NEXT: sltu a2, a0, s11 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: sltu a2, a0, a2 ; RV32-NEXT: add a1, a1, a2 -; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 32(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 0(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: explode_16xi64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 40ff8b50d99d8..2ea90203b2103 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -69,9 +69,9 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 -; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: vmv.v.i v0, 10 +; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret @@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; V128-NEXT: vid.v v8 ; V128-NEXT: vsrl.vi v8, v8, 1 -; V128-NEXT: vadd.vi v8, v8, 1 ; V128-NEXT: vmv.v.i v0, 10 +; V128-NEXT: vadd.vi v8, v8, 1 ; V128-NEXT: vrgather.vv v10, v9, v8, v0.t ; V128-NEXT: vmv.v.v v8, v10 ; V128-NEXT: ret @@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu ; V512-NEXT: vid.v v8 ; V512-NEXT: vsrl.vi v8, v8, 1 -; V512-NEXT: vadd.vi v8, v8, 1 ; V512-NEXT: vmv.v.i v0, 10 +; V512-NEXT: vadd.vi v8, v8, 1 ; V512-NEXT: vrgather.vv v10, v9, v8, v0.t ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret @@ -426,13 +426,13 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-NEXT: vwmaccu.vx v8, a0, v16 ; V128-NEXT: lui a1, 699051 ; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: li a2, 32 ; V128-NEXT: vmv.s.x v0, a1 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v8, v24, v0 -; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: addi a1, sp, 16 ; V128-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: vwaddu.vv v0, v16, v8 ; V128-NEXT: vwmaccu.vx v0, a0, v8 ; V128-NEXT: vmv8r.v v8, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 0dc72fa1f3b59..32782f1c6045e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -86,8 +86,8 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v11, (a0) -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vmv.v.i v0, 8 +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -159,22 +159,21 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, 2 -; RV32-NEXT: lui a0, %hi(.LCPI11_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV32-NEXT: vle16.v v20, (a0) ; RV32-NEXT: li a0, 5 +; RV32-NEXT: lui a1, %hi(.LCPI11_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI11_0) +; RV32-NEXT: vle16.v v20, (a1) ; RV32-NEXT: vslide1down.vx v21, v16, a0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v16, v8, v20 ; RV32-NEXT: li a0, 164 ; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v8, v20 ; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vv_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv4r.v v16, v8 ; RV64-NEXT: lui a0, 327683 ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: addi a0, a0, 1 @@ -183,7 +182,7 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.x v20, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgatherei16.vv v8, v16, v20 +; RV64-NEXT: vrgatherei16.vv v16, v8, v20 ; RV64-NEXT: li a0, 164 ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: lui a0, 163841 @@ -191,9 +190,10 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV64-NEXT: addi a0, a0, 1 ; RV64-NEXT: slli a0, a0, 17 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vmv.v.x v8, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgatherei16.vv v8, v12, v16, v0.t +; RV64-NEXT: vrgatherei16.vv v16, v12, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: ret %s = shufflevector <8 x i64> %x, <8 x i64> %y, <8 x i32> ret <8 x i64> %s @@ -207,13 +207,13 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vmv.v.i v20, -1 -; RV32-NEXT: vrgatherei16.vv v12, v20, v16 ; RV32-NEXT: lui a0, %hi(.LCPI12_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) -; RV32-NEXT: vle16.v v16, (a0) +; RV32-NEXT: vle16.v v17, (a0) ; RV32-NEXT: li a0, 113 ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t +; RV32-NEXT: vrgatherei16.vv v12, v20, v16 +; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; @@ -364,10 +364,10 @@ define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) { define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: li a0, 66 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -383,10 +383,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: li a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv.s.x v11, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -399,10 +399,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 2 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: li a0, 67 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -418,10 +418,10 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: addi a0, a0, 514 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v11, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -437,10 +437,10 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v11, v10, 2 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -460,10 +460,10 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 98 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -663,8 +663,8 @@ define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_slidedown: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: li a0, 195 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 @@ -677,10 +677,10 @@ define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: li a0, -22 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -691,13 +691,13 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: unmergable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: li a0, -22 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 635869904832c..79c36a629465d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1100,46 +1100,46 @@ define void @mulhu_v16i8(ptr %x) { ; CHECK-LABEL: mulhu_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: lui a1, 3 ; CHECK-NEXT: addi a1, a1, -2044 ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: li a1, -128 -; CHECK-NEXT: vmerge.vxm v10, v9, a1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: lui a1, 1 ; CHECK-NEXT: addi a2, a1, 32 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: lui a2, %hi(.LCPI65_0) ; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0) ; CHECK-NEXT: vle8.v v11, (a2) -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsrl.vv v9, v8, v9 -; CHECK-NEXT: vmulhu.vv v9, v9, v11 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: vmulhu.vv v8, v8, v10 -; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: li a2, -128 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vxm v12, v10, a2, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vsrl.vv v8, v9, v8 +; CHECK-NEXT: vmulhu.vv v8, v8, v11 +; CHECK-NEXT: vsub.vv v9, v9, v8 +; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vadd.vv v9, v9, v8 ; CHECK-NEXT: li a2, 513 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 4 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 ; CHECK-NEXT: addi a1, a1, 78 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v9, v9, 3, v0 ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addi a1, a1, 304 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 -; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vmerge.vim v10, v10, 3, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 2, v0 +; CHECK-NEXT: vsrl.vv v8, v9, v8 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x @@ -1158,16 +1158,16 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: lui a1, %hi(.LCPI66_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 1 +; CHECK-NEXT: vle16.v v11, (a1) +; CHECK-NEXT: vmv.v.i v12, 1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v11, 6 +; CHECK-NEXT: vslideup.vi v9, v12, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: lui a1, %hi(.LCPI66_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) -; CHECK-NEXT: vle16.v v12, (a1) ; CHECK-NEXT: vsrl.vv v9, v8, v9 -; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vmulhu.vv v9, v9, v11 ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vmulhu.vv v8, v8, v10 ; CHECK-NEXT: vadd.vv v8, v8, v9 @@ -1176,7 +1176,7 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v11, 6 +; CHECK-NEXT: vslideup.vi v9, v12, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) @@ -1222,18 +1222,18 @@ define void @mulhu_v4i32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: vmv.s.x v9, a1 -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v10, v9, 2 ; CHECK-NEXT: lui a1, %hi(.LCPI68_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI68_0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a1) +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vslideup.vi v11, v10, 2 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmulhu.vv v9, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: vmulhu.vv v8, v8, v10 +; CHECK-NEXT: vmulhu.vv v8, v8, v11 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: lui a1, 4128 ; CHECK-NEXT: addi a1, a1, 514 @@ -1455,13 +1455,13 @@ define void @mulhs_v2i64(ptr %x) { ; RV64-LABEL: mulhs_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, 349525 ; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: slli a2, a1, 32 -; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: lui a2, %hi(.LCPI74_0) ; RV64-NEXT: ld a2, %lo(.LCPI74_0)(a2) -; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: slli a3, a1, 32 +; RV64-NEXT: add a1, a1, a3 ; RV64-NEXT: vmv.v.x v9, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV64-NEXT: vmv.s.x v9, a2 @@ -3260,49 +3260,47 @@ define void @mulhu_v32i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: lui a1, 163907 ; CHECK-NEXT: addi a1, a1, -2044 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: li a1, -128 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vxm v12, v10, a1, v0 ; CHECK-NEXT: lui a1, 66049 ; CHECK-NEXT: addi a1, a1, 32 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: lui a1, %hi(.LCPI181_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI181_0) ; CHECK-NEXT: vle8.v v14, (a1) -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 -; CHECK-NEXT: vsrl.vv v10, v8, v10 -; CHECK-NEXT: vmulhu.vv v10, v10, v14 -; CHECK-NEXT: vsub.vv v8, v8, v10 -; CHECK-NEXT: vmulhu.vv v8, v8, v12 -; CHECK-NEXT: vadd.vv v8, v8, v10 -; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: li a1, -128 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vxm v16, v12, a1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 +; CHECK-NEXT: vsrl.vv v8, v10, v8 +; CHECK-NEXT: vmulhu.vv v8, v8, v14 +; CHECK-NEXT: vsub.vv v10, v10, v8 +; CHECK-NEXT: vmulhu.vv v10, v10, v16 +; CHECK-NEXT: vadd.vv v10, v10, v8 ; CHECK-NEXT: lui a1, 8208 ; CHECK-NEXT: addi a1, a1, 513 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 ; CHECK-NEXT: lui a1, 66785 ; CHECK-NEXT: addi a1, a1, 78 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v10, v10, 3, v0 ; CHECK-NEXT: lui a1, 529160 ; CHECK-NEXT: addi a1, a1, 304 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 -; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vmerge.vim v12, v12, 3, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v12, 2, v0 +; CHECK-NEXT: vsrl.vv v8, v10, v8 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x @@ -3326,12 +3324,12 @@ define void @mulhu_v16i16(ptr %x) { ; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: lui a1, %hi(.LCPI182_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) ; RV32-NEXT: vle16.v v14, (a1) +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vsext.vf2 v16, v9 ; RV32-NEXT: vsrl.vv v16, v10, v16 ; RV32-NEXT: vmulhu.vv v14, v16, v14 @@ -3361,27 +3359,27 @@ define void @mulhu_v16i16(ptr %x) { ; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: lui a1, 1048568 ; RV64-NEXT: vmerge.vxm v10, v10, a1, v0 +; RV64-NEXT: lui a1, %hi(.LCPI182_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) +; RV64-NEXT: vle16.v v12, (a1) ; RV64-NEXT: li a1, 1 ; RV64-NEXT: slli a1, a1, 48 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v12, a1 +; RV64-NEXT: vmv.v.x v14, a1 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: lui a1, %hi(.LCPI182_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) -; RV64-NEXT: vle16.v v14, (a1) -; RV64-NEXT: vsext.vf2 v16, v12 -; RV64-NEXT: vsrl.vv v12, v8, v16 -; RV64-NEXT: vmulhu.vv v12, v12, v14 -; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vmulhu.vv v8, v8, v10 -; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: vsext.vf2 v16, v14 +; RV64-NEXT: vsrl.vv v14, v8, v16 +; RV64-NEXT: vmulhu.vv v12, v14, v12 ; RV64-NEXT: lui a1, %hi(.LCPI182_1) ; RV64-NEXT: addi a1, a1, %lo(.LCPI182_1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vlse64.v v10, (a1), zero +; RV64-NEXT: vlse64.v v14, (a1), zero ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vsext.vf2 v12, v10 -; RV64-NEXT: vsrl.vv v8, v8, v12 +; RV64-NEXT: vsub.vv v8, v8, v12 +; RV64-NEXT: vmulhu.vv v8, v8, v10 +; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: vsext.vf2 v10, v14 +; RV64-NEXT: vsrl.vv v8, v8, v10 ; RV64-NEXT: vse16.v v8, (a0) ; RV64-NEXT: ret %a = load <16 x i16>, ptr %x @@ -3433,23 +3431,24 @@ define void @mulhu_v4i64(ptr %x) { ; RV32-NEXT: vle32.v v10, (a1) ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vmulhu.vv v10, v8, v10 -; RV32-NEXT: vsub.vv v8, v8, v10 ; RV32-NEXT: lui a1, 524288 ; RV32-NEXT: vmv.s.x v12, a1 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV32-NEXT: vslideup.vi v14, v12, 5 +; RV32-NEXT: lui a1, %hi(.LCPI184_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI184_1) +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vle8.v v12, (a1) ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vsub.vv v8, v8, v10 ; RV32-NEXT: vmulhu.vv v8, v8, v14 ; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: lui a1, %hi(.LCPI184_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI184_1) ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vle8.v v10, (a1) -; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vsext.vf4 v10, v12 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vsrl.vv v8, v8, v12 +; RV32-NEXT: vsrl.vv v8, v8, v10 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; @@ -3457,19 +3456,19 @@ define void @mulhu_v4i64(ptr %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: lui a1, %hi(.LCPI184_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) +; RV64-NEXT: vle64.v v10, (a1) ; RV64-NEXT: li a1, -1 ; RV64-NEXT: slli a1, a1, 63 -; RV64-NEXT: vmv.s.x v10, a1 -; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vmv.s.x v12, a1 +; RV64-NEXT: vmv.v.i v14, 0 ; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; RV64-NEXT: vslideup.vi v12, v10, 2 -; RV64-NEXT: lui a1, %hi(.LCPI184_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) +; RV64-NEXT: vslideup.vi v14, v12, 2 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v10, (a1) ; RV64-NEXT: vmulhu.vv v10, v8, v10 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vmulhu.vv v8, v8, v12 +; RV64-NEXT: vmulhu.vv v8, v8, v14 ; RV64-NEXT: vadd.vv v8, v8, v10 ; RV64-NEXT: lui a1, 12320 ; RV64-NEXT: addi a1, a1, 513 @@ -3488,14 +3487,13 @@ define void @mulhs_v32i8(ptr %x) { ; CHECK-LABEL: mulhs_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v10, 7 ; CHECK-NEXT: lui a1, 304453 ; CHECK-NEXT: addi a1, a1, -1452 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 7 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: li a1, -123 ; CHECK-NEXT: vmv.v.x v12, a1 @@ -3615,19 +3613,19 @@ define void @mulhs_v4i64(ptr %x) { ; ; RV64-LABEL: mulhs_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, 349525 ; RV64-NEXT: addiw a1, a1, 1365 ; RV64-NEXT: slli a2, a1, 32 ; RV64-NEXT: add a1, a1, a2 -; RV64-NEXT: vmv.v.x v10, a1 -; RV64-NEXT: lui a1, %hi(.LCPI188_0) -; RV64-NEXT: ld a1, %lo(.LCPI188_0)(a1) +; RV64-NEXT: lui a2, %hi(.LCPI188_0) +; RV64-NEXT: ld a2, %lo(.LCPI188_0)(a2) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.i v0, 5 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vmerge.vxm v10, v10, a1, v0 +; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: vmerge.vxm v10, v10, a2, v0 ; RV64-NEXT: vmulh.vv v10, v8, v10 ; RV64-NEXT: lui a1, 1044496 ; RV64-NEXT: addi a1, a1, -256 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll index eb95d86e34045..82e0760d593c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll @@ -7,57 +7,63 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-LABEL: load_large_vector: ; ZVE32X: # %bb.0: -; ZVE32X-NEXT: ld a1, 56(a0) -; ZVE32X-NEXT: ld a2, 32(a0) -; ZVE32X-NEXT: ld a3, 24(a0) -; ZVE32X-NEXT: ld a4, 48(a0) -; ZVE32X-NEXT: ld a5, 8(a0) -; ZVE32X-NEXT: ld a6, 0(a0) -; ZVE32X-NEXT: xor a2, a3, a2 -; ZVE32X-NEXT: snez a2, a2 +; ZVE32X-NEXT: ld a1, 80(a0) +; ZVE32X-NEXT: ld a2, 72(a0) +; ZVE32X-NEXT: ld a3, 56(a0) +; ZVE32X-NEXT: ld a4, 32(a0) +; ZVE32X-NEXT: ld a5, 24(a0) +; ZVE32X-NEXT: ld a6, 48(a0) +; ZVE32X-NEXT: ld a7, 8(a0) +; ZVE32X-NEXT: ld a0, 0(a0) +; ZVE32X-NEXT: xor a4, a5, a4 +; ZVE32X-NEXT: snez a4, a4 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vmv.s.x v8, a2 +; ZVE32X-NEXT: vmv.s.x v8, a4 ; ZVE32X-NEXT: vand.vi v8, v8, 1 ; ZVE32X-NEXT: vmsne.vi v0, v8, 0 -; ZVE32X-NEXT: vmv.s.x v8, zero -; ZVE32X-NEXT: vmerge.vim v9, v8, 1, v0 -; ZVE32X-NEXT: xor a2, a6, a5 -; ZVE32X-NEXT: snez a2, a2 -; ZVE32X-NEXT: vmv.s.x v10, a2 +; ZVE32X-NEXT: vmv.s.x v9, zero +; ZVE32X-NEXT: vmerge.vim v8, v9, 1, v0 +; ZVE32X-NEXT: xor a0, a0, a7 +; ZVE32X-NEXT: snez a0, a0 +; ZVE32X-NEXT: vmv.s.x v10, a0 ; ZVE32X-NEXT: vand.vi v10, v10, 1 ; ZVE32X-NEXT: vmsne.vi v0, v10, 0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32X-NEXT: vmv.v.i v10, 0 ; ZVE32X-NEXT: vmerge.vim v11, v10, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v11, v9, 1 +; ZVE32X-NEXT: vslideup.vi v11, v8, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32X-NEXT: vmsne.vi v0, v11, 0 -; ZVE32X-NEXT: ld a2, 80(a0) -; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0 -; ZVE32X-NEXT: xor a1, a4, a1 -; ZVE32X-NEXT: snez a1, a1 -; ZVE32X-NEXT: vmv.s.x v11, a1 +; ZVE32X-NEXT: xor a0, a6, a3 +; ZVE32X-NEXT: snez a0, a0 +; ZVE32X-NEXT: vmv.s.x v8, a0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vand.vi v11, v11, 1 -; ZVE32X-NEXT: vmsne.vi v0, v11, 0 -; ZVE32X-NEXT: ld a0, 72(a0) -; ZVE32X-NEXT: vmerge.vim v11, v8, 1, v0 +; ZVE32X-NEXT: vand.vi v8, v8, 1 +; ZVE32X-NEXT: vmsne.vi v8, v8, 0 +; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32X-NEXT: vmerge.vim v11, v10, 1, v0 +; ZVE32X-NEXT: vmv1r.v v0, v8 +; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; ZVE32X-NEXT: vmerge.vim v8, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 3, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v9, v11, 2 +; ZVE32X-NEXT: vslideup.vi v11, v8, 2 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v9, 0 -; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0 -; ZVE32X-NEXT: xor a0, a0, a2 -; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: vmv.s.x v10, a0 +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 +; ZVE32X-NEXT: xor a1, a2, a1 +; ZVE32X-NEXT: snez a0, a1 +; ZVE32X-NEXT: vmv.s.x v8, a0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vand.vi v10, v10, 1 -; ZVE32X-NEXT: vmsne.vi v0, v10, 0 -; ZVE32X-NEXT: vmerge.vim v8, v8, 1, v0 +; ZVE32X-NEXT: vand.vi v8, v8, 1 +; ZVE32X-NEXT: vmsne.vi v8, v8, 0 +; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32X-NEXT: vmerge.vim v10, v10, 1, v0 +; ZVE32X-NEXT: vmv1r.v v0, v8 +; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; ZVE32X-NEXT: vmerge.vim v8, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vslideup.vi v9, v8, 3 -; ZVE32X-NEXT: vmsne.vi v0, v9, 0 +; ZVE32X-NEXT: vslideup.vi v10, v8, 3 +; ZVE32X-NEXT: vmsne.vi v0, v10, 0 ; ZVE32X-NEXT: ret ; ; ZVE64X-LABEL: load_large_vector: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 99364264de829..178a920169ad9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -159,16 +159,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 54 +; RV32-NEXT: li a3, 82 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x36, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 54 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 82 * vlenb ; RV32-NEXT: addi a3, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a3) ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 21 +; RV32-NEXT: li a4, 57 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 @@ -177,30 +177,27 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vslideup.vi v8, v16, 4 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 3 -; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: li a5, 41 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 12 -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v1, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 37 -; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: slli a5, a4, 6 +; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 4 -; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: li a5, 45 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill @@ -209,391 +206,429 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: vle16.v v8, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 13 -; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: slli a5, a4, 5 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, %hi(.LCPI6_1) +; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1) +; RV32-NEXT: lui a5, 1 +; RV32-NEXT: vle16.v v8, (a4) +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a6, 25 +; RV32-NEXT: mul a4, a4, a6 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v24, (a1) +; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 45 +; RV32-NEXT: li a4, 73 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, %hi(.LCPI6_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_1) -; RV32-NEXT: lui a4, 1 -; RV32-NEXT: addi a4, a4, -64 -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle32.v v24, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a5, a1, 2 -; RV32-NEXT: add a1, a5, a1 +; RV32-NEXT: li a3, 49 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v16, (a3) +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, a5, -64 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv.s.x v2, a4 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 5 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v4 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vrgatherei16.vv v16, v8, v4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 25 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t -; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv.v.v v12, v8 +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 45 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv4r.v v16, v8 -; RV32-NEXT: vslideup.vi v8, v16, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vslideup.vi v12, v8, 2 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 21 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v8, v16, 8, v0.t -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vslideup.vi v12, v16, 8, v0.t +; RV32-NEXT: vmv.v.v v20, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: lui a3, %hi(.LCPI6_3) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3) +; RV32-NEXT: lui a4, %hi(.LCPI6_4) +; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV32-NEXT: vle16.v v4, (a1) +; RV32-NEXT: vle16.v v16, (a3) +; RV32-NEXT: addi a1, a4, %lo(.LCPI6_4) +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV32-NEXT: vle16.v v2, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 73 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, %hi(.LCPI6_3) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_3) -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vrgatherei16.vv v24, v8, v4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 37 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 49 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v8, v16, v0.t +; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v20, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v4 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v24, v2 +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: slli a3, a1, 6 ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t -; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v16, v8, 6, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: slli a3, a1, 5 ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, %hi(.LCPI6_4) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4) -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v12, v24, v8 -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: vslideup.vi v12, v16, 6, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_5) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5) +; RV32-NEXT: lui a3, %hi(.LCPI6_6) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v24, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI6_6) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_6) -; RV32-NEXT: li a3, 960 -; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: vmv.s.x v0, a3 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle16.v v16, (a1) +; RV32-NEXT: vle16.v v4, (a3) +; RV32-NEXT: li a1, 960 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 13 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v24 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 +; RV32-NEXT: li a3, 73 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma +; RV32-NEXT: vrgatherei16.vv v8, v24, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 49 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv.v.v v12, v8 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 25 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7) -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: lui a3, %hi(.LCPI6_8) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_8) +; RV32-NEXT: lui a4, %hi(.LCPI6_9) +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: addi a1, a4, %lo(.LCPI6_9) +; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV32-NEXT: vle16.v v24, (a3) +; RV32-NEXT: vle16.v v28, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v4, v16, v8 -; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v4, v0, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 21 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v4, v8, 4, v0.t -; RV32-NEXT: lui a1, %hi(.LCPI6_8) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_8) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v0, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI6_9) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9) -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v4, v8, 4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 21 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v0 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 73 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v4, v8 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 13 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v12, v8, 6 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_10) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: lui a1, 15 -; RV32-NEXT: vmv.s.x v24, a1 -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmv.s.x v3, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 57 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v12, v16, 6 +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv.v.v v28, v12 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 57 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_11) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11) +; RV32-NEXT: lui a3, %hi(.LCPI6_12) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v0, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI6_12) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_12) -; RV32-NEXT: li a3, 1008 -; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: vmv.s.x v25, a3 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vle16.v v12, (a3) +; RV32-NEXT: li a1, 1008 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 73 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v0 -; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vrgatherei16.vv v24, v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 29 +; RV32-NEXT: li a3, 49 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v28, v8 +; RV32-NEXT: vrgatherei16.vv v24, v16, v12, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_13) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13) -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: lui a3, %hi(.LCPI6_14) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_14) +; RV32-NEXT: lui a4, %hi(.LCPI6_15) +; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV32-NEXT: vle16.v v20, (a1) +; RV32-NEXT: addi a1, a4, %lo(.LCPI6_15) +; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma +; RV32-NEXT: vle16.v v24, (a3) ; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 41 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: slli a3, a1, 6 ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v8, v20, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: slli a3, a1, 5 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 25 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t -; RV32-NEXT: lui a1, %hi(.LCPI6_14) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_14) +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 73 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI6_15) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_15) -; RV32-NEXT: vle16.v v28, (a1) +; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 45 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 49 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 29 +; RV32-NEXT: li a2, 21 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 13 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v24, v8 +; RV32-NEXT: vmv.v.v v24, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 57 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.v.v v28, v0 +; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: addi a1, a0, 320 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vse32.v v24, (a1) +; RV32-NEXT: vse32.v v16, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 21 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v28, (a1) ; RV32-NEXT: addi a1, a0, 192 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v24, (a1) ; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 13 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v20, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a3, a2, 2 -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: li a3, 37 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 4 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 54 +; RV32-NEXT: li a1, 82 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 @@ -604,372 +639,422 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 56 +; RV64-NEXT: li a3, 74 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xca, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 74 * vlenb ; RV64-NEXT: addi a2, a1, 256 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: li a3, 25 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: addi a2, a1, 128 -; RV64-NEXT: vle64.v v8, (a2) -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 40 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a3, a1, 6 +; RV64-NEXT: add a1, a3, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgather.vi v8, v16, 4 +; RV64-NEXT: vrgather.vi v12, v16, 4 ; RV64-NEXT: li a1, 128 -; RV64-NEXT: vmv.s.x v4, a1 +; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v16, 8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: li a3, 49 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vmv1r.v v0, v4 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v8, v16, 2, v0.t -; RV64-NEXT: vmv.v.v v20, v8 +; RV64-NEXT: vrgather.vi v12, v16, 2, v0.t ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vid.v v10 ; RV64-NEXT: li a1, 6 -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vmul.vx v6, v8, a1 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v8, v24, v6 +; RV64-NEXT: vmul.vx v2, v10, a1 +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vle64.v v16, (a2) +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 57 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v7, a1 +; RV64-NEXT: vadd.vi v10, v2, -16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: li a1, 56 -; RV64-NEXT: vmv.s.x v5, a1 -; RV64-NEXT: vadd.vi v16, v6, -16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v5 +; RV64-NEXT: vrgatherei16.vv v16, v24, v2 +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t +; RV64-NEXT: vrgatherei16.vv v16, v24, v10, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v20, v8 +; RV64-NEXT: vmv.v.v v12, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 21 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 25 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v24, v16, 5 -; RV64-NEXT: vmv1r.v v0, v4 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgather.vi v12, v16, 5 +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vmv1r.v v6, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v24, v16, 3, v0.t +; RV64-NEXT: vrgather.vi v12, v16, 3, v0.t +; RV64-NEXT: vmv.v.v v28, v12 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v28, v6, 1 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v24, v2, 1 +; RV64-NEXT: vadd.vi v26, v2, -15 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v28 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v28, v6, -15 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v5 +; RV64-NEXT: vrgatherei16.vv v16, v8, v24 +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v16, v8, v26, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v24, v8 +; RV64-NEXT: vmv.v.v v28, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 12 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: addi a1, a1, 7 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 6 +; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 25 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vmv2r.v v26, v6 -; RV64-NEXT: vadd.vi v24, v6, 2 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vrgatherei16.vv v12, v16, v9 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v0, v24 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vrgatherei16.vv v12, v16, v10 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 41 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv4r.v v8, v16 +; RV64-NEXT: vrgather.vi v12, v16, 2 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vrgather.vi v12, v16, 3 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: li a1, 24 -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv.s.x v1, a1 +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vadd.vi v24, v2, 2 +; RV64-NEXT: vadd.vi v4, v2, -14 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v24, v26, -14 -; RV64-NEXT: vmv2r.v v6, v26 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vrgatherei16.vv v8, v16, v24, v0.t -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.i v12, 6 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgatherei16.vv v8, v16, v24 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 57 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v20, v24, v12 +; RV64-NEXT: vrgatherei16.vv v8, v24, v4, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 +; RV64-NEXT: li a2, 25 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v24, 4, v0.t -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v20, v8 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: li a2, 45 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vmv2r.v v10, v6 +; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgather.vi v20, v16, 4, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 6 +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v6, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v8, v6, 3 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vadd.vi v4, v2, 3 +; RV64-NEXT: vadd.vi v8, v2, -13 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v0, v8 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v28, v10, -13 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vrgatherei16.vv v8, v16, v4 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v8, v28, v0.t -; RV64-NEXT: lui a1, 16 -; RV64-NEXT: addi a1, a1, 7 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v12, a1 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv4r.v v8, v0 -; RV64-NEXT: vrgatherei16.vv v20, v0, v12 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v24, 5, v0.t -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v20, v16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV64-NEXT: lui a1, 96 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v12, a1 +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: li a1, 192 -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vrgather.vi v8, v24, 5, v0.t ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 41 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v28, v8, 2 -; RV64-NEXT: vrgatherei16.vv v28, v24, v12, v0.t -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: lui a1, 96 +; RV64-NEXT: li a2, 192 +; RV64-NEXT: vmv.s.x v28, a2 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vmv1r.v v0, v28 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 6 +; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v16, v24, 4 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgatherei16.vv v12, v24, v8, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 +; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v0, v16 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: li a1, 28 ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v26, v24, -12 +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vadd.vi v30, v2, 4 +; RV64-NEXT: vadd.vi v6, v2, -12 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vrgatherei16.vv v16, v8, v30 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v26, v0.t -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v28, v8 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v16, v8, v6, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 112 ; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.x v12, a1 +; RV64-NEXT: vmv1r.v v0, v28 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgatherei16.vv v16, v24, v12, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v16, 3 +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 45 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 25 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v12, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma +; RV64-NEXT: vmv.v.v v16, v24 +; RV64-NEXT: vmv2r.v v8, v2 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v12, v24, 5 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v12, v2, 5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v0, v12 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v24, v0, v12 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v12, v24, -11 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vadd.vi v2, v8, -11 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v24, v12, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; RV64-NEXT: vrgatherei16.vv v24, v8, v2, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 41 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vmv.v.v v12, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v20, v0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: addi a1, a0, 320 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 256 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: vse64.v v20, (a1) ; RV64-NEXT: addi a1, a0, 192 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 20 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: vse64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 12 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 21 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 56 +; RV64-NEXT: li a1, 74 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index c37782ba60d01..9463267d0b0e6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -57,17 +57,17 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb @@ -118,50 +118,50 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) { ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb @@ -224,50 +224,50 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb @@ -328,57 +328,57 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 68(sp) ; RV32-NEXT: sw a0, 64(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 7 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 124(sp) ; RV32-NEXT: sw a0, 120(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 6 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 116(sp) ; RV32-NEXT: sw a0, 112(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 5 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 108(sp) ; RV32-NEXT: sw a0, 104(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 4 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 100(sp) ; RV32-NEXT: sw a0, 96(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 92(sp) ; RV32-NEXT: sw a0, 88(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 84(sp) ; RV32-NEXT: sw a0, 80(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 192 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf @@ -502,64 +502,64 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 196(sp) ; RV32-NEXT: sw a0, 192(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 132(sp) ; RV32-NEXT: sw a0, 128(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 156(sp) ; RV32-NEXT: sw a0, 152(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 148(sp) ; RV32-NEXT: sw a0, 144(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 140(sp) ; RV32-NEXT: sw a0, 136(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 7 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 188(sp) ; RV32-NEXT: sw a0, 184(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 6 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 180(sp) ; RV32-NEXT: sw a0, 176(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 5 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf ; RV32-NEXT: sw a1, 172(sp) ; RV32-NEXT: sw a0, 168(sp) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: addi a0, sp, 384 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 4 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrintf @@ -711,17 +711,17 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb @@ -771,50 +771,50 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb @@ -885,32 +885,32 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; RV32-NEXT: call llrint ; RV32-NEXT: sw a1, 164(sp) ; RV32-NEXT: sw a0, 160(sp) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint ; RV32-NEXT: sw a1, 132(sp) ; RV32-NEXT: sw a0, 128(sp) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint ; RV32-NEXT: sw a1, 140(sp) ; RV32-NEXT: sw a0, 136(sp) -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint ; RV32-NEXT: sw a1, 156(sp) ; RV32-NEXT: sw a0, 152(sp) -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: addi a0, sp, 256 ; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa0, v8 ; RV32-NEXT: call llrint diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index dad7524ab04db..9b0944e7e2f72 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -774,30 +774,27 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; RV32-NEXT: fcvt.w.d a0, fa5 ; RV32-NEXT: vfmv.f.s fa5, v8 ; RV32-NEXT: fcvt.w.d a1, fa5 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 2 -; RV32-NEXT: vfmv.f.s fa5, v12 -; RV32-NEXT: fcvt.w.d a0, fa5 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vfmv.f.s fa5, v10 +; RV32-NEXT: fcvt.w.d a2, fa5 ; RV32-NEXT: vslidedown.vi v8, v8, 3 ; RV32-NEXT: fld fa5, 32(sp) ; RV32-NEXT: vfmv.f.s fa4, v8 ; RV32-NEXT: fld fa3, 40(sp) -; RV32-NEXT: fcvt.w.d a0, fa4 -; RV32-NEXT: fcvt.w.d a1, fa5 -; RV32-NEXT: fld fa5, 48(sp) -; RV32-NEXT: fcvt.w.d a2, fa3 +; RV32-NEXT: fcvt.w.d a3, fa4 +; RV32-NEXT: fcvt.w.d a4, fa5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vslide1down.vx v8, v10, a0 +; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: fcvt.w.d a1, fa3 +; RV32-NEXT: fld fa5, 48(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a3 ; RV32-NEXT: fcvt.w.d a0, fa5 ; RV32-NEXT: fld fa5, 56(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a4 ; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: fcvt.w.d a0, fa5 ; RV32-NEXT: vslide1down.vx v8, v8, a0 @@ -827,30 +824,27 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; RV64-i32-NEXT: fcvt.l.d a0, fa5 ; RV64-i32-NEXT: vfmv.f.s fa5, v8 ; RV64-i32-NEXT: fcvt.l.d a1, fa5 -; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-i32-NEXT: vmv.v.x v10, a1 -; RV64-i32-NEXT: vslide1down.vx v10, v10, a0 -; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-i32-NEXT: vslidedown.vi v12, v8, 2 -; RV64-i32-NEXT: vfmv.f.s fa5, v12 -; RV64-i32-NEXT: fcvt.l.d a0, fa5 -; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-i32-NEXT: vslide1down.vx v10, v10, a0 ; RV64-i32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-i32-NEXT: vslidedown.vi v10, v8, 2 +; RV64-i32-NEXT: vfmv.f.s fa5, v10 +; RV64-i32-NEXT: fcvt.l.d a2, fa5 ; RV64-i32-NEXT: vslidedown.vi v8, v8, 3 ; RV64-i32-NEXT: fld fa5, 32(sp) ; RV64-i32-NEXT: vfmv.f.s fa4, v8 ; RV64-i32-NEXT: fld fa3, 40(sp) -; RV64-i32-NEXT: fcvt.l.d a0, fa4 -; RV64-i32-NEXT: fcvt.l.d a1, fa5 -; RV64-i32-NEXT: fld fa5, 48(sp) -; RV64-i32-NEXT: fcvt.l.d a2, fa3 +; RV64-i32-NEXT: fcvt.l.d a3, fa4 +; RV64-i32-NEXT: fcvt.l.d a4, fa5 ; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-i32-NEXT: vslide1down.vx v8, v10, a0 +; RV64-i32-NEXT: vmv.v.x v8, a1 +; RV64-i32-NEXT: fcvt.l.d a1, fa3 +; RV64-i32-NEXT: fld fa5, 48(sp) +; RV64-i32-NEXT: vslide1down.vx v8, v8, a0 +; RV64-i32-NEXT: vslide1down.vx v8, v8, a2 +; RV64-i32-NEXT: vslide1down.vx v8, v8, a3 ; RV64-i32-NEXT: fcvt.l.d a0, fa5 ; RV64-i32-NEXT: fld fa5, 56(sp) +; RV64-i32-NEXT: vslide1down.vx v8, v8, a4 ; RV64-i32-NEXT: vslide1down.vx v8, v8, a1 -; RV64-i32-NEXT: vslide1down.vx v8, v8, a2 ; RV64-i32-NEXT: vslide1down.vx v8, v8, a0 ; RV64-i32-NEXT: fcvt.l.d a0, fa5 ; RV64-i32-NEXT: vslide1down.vx v8, v8, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index 023d707f07bff..1748315186936 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -245,8 +245,8 @@ define <8 x i1> @buildvec_mask_v8i1() { define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 @@ -256,8 +256,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; ZVE32F-NEXT: li a2, 19 +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; ZVE32F-NEXT: vmv.s.x v0, a2 ; ZVE32F-NEXT: vmv.v.x v8, a1 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 @@ -286,8 +286,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero -; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -303,8 +303,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero -; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; ZVE32F-NEXT: vmv.v.i v0, 15 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -331,8 +331,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero -; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -348,8 +348,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero -; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; ZVE32F-NEXT: vmv.v.i v0, 15 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -375,8 +375,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -391,8 +391,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vmv.v.i v0, 15 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll index 7fc442c88d101..979785dd2c024 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -24,11 +24,11 @@ define void @splat_zeros_v2i1(ptr %x) { define void @splat_v1i1(ptr %x, i1 %y) { ; CHECK-LABEL: splat_v1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.s.x v9, a1 -; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 0d2d6696fce25..db0969c85a8e2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -520,16 +520,16 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: lbu a1, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vlse8.v v8, (a2), zero -; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a3) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1), <4 x i8> %passthru) ret <4 x i8> %v @@ -711,8 +711,8 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: .LBB12_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -734,8 +734,8 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB12_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -1209,16 +1209,16 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) { ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: lh a1, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a3) +; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x i16> %passthru) ret <4 x i16> %v @@ -1405,8 +1405,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: .LBB23_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -1430,8 +1430,8 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB23_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -1556,8 +1556,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB24_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -1581,8 +1581,8 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB24_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -1708,8 +1708,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB25_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -1734,8 +1734,8 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB25_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -1863,8 +1863,8 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: .LBB26_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -1887,8 +1887,8 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB26_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -2258,16 +2258,16 @@ define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) { ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: lw a1, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vlse32.v v8, (a2), zero -; RV64ZVE32F-NEXT: lw a0, 0(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a3) +; RV64ZVE32F-NEXT: lw a0, 0(a0) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x i32> %passthru) ret <4 x i32> %v @@ -2453,8 +2453,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -2478,8 +2478,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB35_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -2603,8 +2603,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB36_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -2628,8 +2628,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB36_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -2757,8 +2757,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB37_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -2783,8 +2783,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB37_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -2915,8 +2915,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB38_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -2940,8 +2940,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB38_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -3066,8 +3066,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB39_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -3091,8 +3091,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB39_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -3221,8 +3221,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB40_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -3247,8 +3247,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB40_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -3375,8 +3375,8 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: .LBB41_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -3399,8 +3399,8 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB41_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -4088,13 +4088,13 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB48_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB48_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -4251,8 +4251,8 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: .LBB48_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB48_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -4362,13 +4362,13 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB49_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB49_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -4525,8 +4525,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB49_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB49_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -4638,13 +4638,13 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB50_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB50_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -4803,8 +4803,8 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB50_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB50_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -4921,13 +4921,13 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB51_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB51_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -5085,8 +5085,8 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB51_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB51_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -5196,13 +5196,13 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB52_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB52_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -5360,8 +5360,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB52_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB52_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -5473,13 +5473,13 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB53_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB53_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -5641,8 +5641,8 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB53_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a7, a6, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a7, .LBB53_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -5757,13 +5757,13 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB54_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB54_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -5921,8 +5921,8 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB54_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB54_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -6030,13 +6030,13 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB55_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB55_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -6194,8 +6194,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: .LBB55_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB55_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -6304,13 +6304,13 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB56_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a3, .LBB56_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) @@ -6470,8 +6470,8 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: .LBB56_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a6, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: beqz a6, .LBB56_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 @@ -6601,13 +6601,13 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a1, t0, 1 -; RV32ZVE32F-NEXT: beqz a1, .LBB57_7 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: andi a2, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: beqz a2, .LBB57_7 +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw a1, 4(a2) ; RV32ZVE32F-NEXT: lw a2, 0(a2) @@ -7016,13 +7016,13 @@ define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: flh fa4, 0(a0) -; RV64ZVE32F-NEXT: flh fa3, 0(a3) +; RV64ZVE32F-NEXT: flh fa4, 0(a3) +; RV64ZVE32F-NEXT: flh fa3, 0(a0) ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 @@ -7212,8 +7212,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: .LBB64_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -7237,8 +7237,8 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB64_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -7363,8 +7363,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB65_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -7388,8 +7388,8 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB65_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -7515,8 +7515,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB66_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -7541,8 +7541,8 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB66_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -7670,8 +7670,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: .LBB67_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -7694,8 +7694,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB67_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -7939,13 +7939,13 @@ define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthr ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a0, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vlse32.v v8, (a2), zero -; RV64ZVE32F-NEXT: flw fa4, 0(a0) -; RV64ZVE32F-NEXT: flw fa3, 0(a3) +; RV64ZVE32F-NEXT: flw fa4, 0(a3) +; RV64ZVE32F-NEXT: flw fa3, 0(a0) ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 @@ -8134,8 +8134,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8159,8 +8159,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB74_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -8284,8 +8284,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: .LBB75_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8309,8 +8309,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB75_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -8438,8 +8438,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: .LBB76_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8464,8 +8464,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB76_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -8596,8 +8596,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: .LBB77_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8621,8 +8621,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB77_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -8747,8 +8747,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB78_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8772,8 +8772,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB78_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -8902,8 +8902,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB79_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -8928,8 +8928,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB79_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -9056,8 +9056,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: .LBB80_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -9080,8 +9080,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB80_9: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else17 @@ -9660,31 +9660,32 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB87_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_11 ; RV32ZVE32F-NEXT: .LBB87_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_12 ; RV32ZVE32F-NEXT: .LBB87_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_13 ; RV32ZVE32F-NEXT: .LBB87_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_14 ; RV32ZVE32F-NEXT: .LBB87_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_15 ; RV32ZVE32F-NEXT: .LBB87_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB87_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB87_16 ; RV32ZVE32F-NEXT: .LBB87_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_9 ; RV32ZVE32F-NEXT: .LBB87_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9702,52 +9703,51 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB87_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_2 ; RV32ZVE32F-NEXT: .LBB87_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_3 ; RV32ZVE32F-NEXT: .LBB87_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_4 ; RV32ZVE32F-NEXT: .LBB87_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_5 ; RV32ZVE32F-NEXT: .LBB87_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_6 ; RV32ZVE32F-NEXT: .LBB87_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB87_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB87_7 ; RV32ZVE32F-NEXT: .LBB87_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_8 ; RV32ZVE32F-NEXT: j .LBB87_9 ; @@ -9775,8 +9775,8 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: .LBB87_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB87_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -9875,31 +9875,32 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB88_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_11 ; RV32ZVE32F-NEXT: .LBB88_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_12 ; RV32ZVE32F-NEXT: .LBB88_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_13 ; RV32ZVE32F-NEXT: .LBB88_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_14 ; RV32ZVE32F-NEXT: .LBB88_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_15 ; RV32ZVE32F-NEXT: .LBB88_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB88_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB88_16 ; RV32ZVE32F-NEXT: .LBB88_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB88_9 ; RV32ZVE32F-NEXT: .LBB88_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9917,52 +9918,51 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB88_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_2 ; RV32ZVE32F-NEXT: .LBB88_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_3 ; RV32ZVE32F-NEXT: .LBB88_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_4 ; RV32ZVE32F-NEXT: .LBB88_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_5 ; RV32ZVE32F-NEXT: .LBB88_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_6 ; RV32ZVE32F-NEXT: .LBB88_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB88_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB88_7 ; RV32ZVE32F-NEXT: .LBB88_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB88_8 ; RV32ZVE32F-NEXT: j .LBB88_9 ; @@ -9990,8 +9990,8 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB88_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB88_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -10092,31 +10092,32 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_10 -; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_11 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB89_10 +; RV32ZVE32F-NEXT: # %bb.1: # %else +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_11 ; RV32ZVE32F-NEXT: .LBB89_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_12 ; RV32ZVE32F-NEXT: .LBB89_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_13 ; RV32ZVE32F-NEXT: .LBB89_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_14 ; RV32ZVE32F-NEXT: .LBB89_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_15 ; RV32ZVE32F-NEXT: .LBB89_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB89_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB89_16 ; RV32ZVE32F-NEXT: .LBB89_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB89_9 ; RV32ZVE32F-NEXT: .LBB89_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10134,52 +10135,51 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB89_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_2 ; RV32ZVE32F-NEXT: .LBB89_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_3 ; RV32ZVE32F-NEXT: .LBB89_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_4 ; RV32ZVE32F-NEXT: .LBB89_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_5 ; RV32ZVE32F-NEXT: .LBB89_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_6 ; RV32ZVE32F-NEXT: .LBB89_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB89_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB89_7 ; RV32ZVE32F-NEXT: .LBB89_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB89_8 ; RV32ZVE32F-NEXT: j .LBB89_9 ; @@ -10209,8 +10209,8 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB89_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB89_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -10316,31 +10316,32 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB90_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_11 ; RV32ZVE32F-NEXT: .LBB90_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_12 ; RV32ZVE32F-NEXT: .LBB90_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_13 ; RV32ZVE32F-NEXT: .LBB90_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_14 ; RV32ZVE32F-NEXT: .LBB90_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_15 ; RV32ZVE32F-NEXT: .LBB90_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB90_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_16 ; RV32ZVE32F-NEXT: .LBB90_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_9 ; RV32ZVE32F-NEXT: .LBB90_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10358,52 +10359,51 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_2 ; RV32ZVE32F-NEXT: .LBB90_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_3 ; RV32ZVE32F-NEXT: .LBB90_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_4 ; RV32ZVE32F-NEXT: .LBB90_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_5 ; RV32ZVE32F-NEXT: .LBB90_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_6 ; RV32ZVE32F-NEXT: .LBB90_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB90_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB90_7 ; RV32ZVE32F-NEXT: .LBB90_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_8 ; RV32ZVE32F-NEXT: j .LBB90_9 ; @@ -10432,8 +10432,8 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB90_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB90_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -10532,31 +10532,32 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB91_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_11 ; RV32ZVE32F-NEXT: .LBB91_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_12 ; RV32ZVE32F-NEXT: .LBB91_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_13 ; RV32ZVE32F-NEXT: .LBB91_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_14 ; RV32ZVE32F-NEXT: .LBB91_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_15 ; RV32ZVE32F-NEXT: .LBB91_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB91_16 ; RV32ZVE32F-NEXT: .LBB91_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB91_9 ; RV32ZVE32F-NEXT: .LBB91_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10574,52 +10575,51 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB91_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_2 ; RV32ZVE32F-NEXT: .LBB91_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_3 ; RV32ZVE32F-NEXT: .LBB91_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_4 ; RV32ZVE32F-NEXT: .LBB91_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_5 ; RV32ZVE32F-NEXT: .LBB91_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_6 ; RV32ZVE32F-NEXT: .LBB91_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB91_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB91_7 ; RV32ZVE32F-NEXT: .LBB91_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB91_8 ; RV32ZVE32F-NEXT: j .LBB91_9 ; @@ -10648,8 +10648,8 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: .LBB91_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB91_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -10750,31 +10750,32 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB92_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_11 ; RV32ZVE32F-NEXT: .LBB92_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_12 ; RV32ZVE32F-NEXT: .LBB92_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_13 ; RV32ZVE32F-NEXT: .LBB92_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_14 ; RV32ZVE32F-NEXT: .LBB92_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_15 ; RV32ZVE32F-NEXT: .LBB92_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB92_16 ; RV32ZVE32F-NEXT: .LBB92_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB92_9 ; RV32ZVE32F-NEXT: .LBB92_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10792,52 +10793,51 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB92_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_2 ; RV32ZVE32F-NEXT: .LBB92_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_3 ; RV32ZVE32F-NEXT: .LBB92_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_4 ; RV32ZVE32F-NEXT: .LBB92_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_5 ; RV32ZVE32F-NEXT: .LBB92_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_6 ; RV32ZVE32F-NEXT: .LBB92_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB92_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB92_7 ; RV32ZVE32F-NEXT: .LBB92_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB92_8 ; RV32ZVE32F-NEXT: j .LBB92_9 ; @@ -10870,8 +10870,8 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: .LBB92_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a4, a3, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a4, .LBB92_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -10975,31 +10975,32 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB93_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_11 ; RV32ZVE32F-NEXT: .LBB93_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_12 ; RV32ZVE32F-NEXT: .LBB93_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_13 ; RV32ZVE32F-NEXT: .LBB93_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_14 ; RV32ZVE32F-NEXT: .LBB93_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_15 ; RV32ZVE32F-NEXT: .LBB93_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB93_16 ; RV32ZVE32F-NEXT: .LBB93_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB93_9 ; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -11017,52 +11018,51 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB93_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_2 ; RV32ZVE32F-NEXT: .LBB93_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_3 ; RV32ZVE32F-NEXT: .LBB93_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_4 ; RV32ZVE32F-NEXT: .LBB93_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_5 ; RV32ZVE32F-NEXT: .LBB93_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_6 ; RV32ZVE32F-NEXT: .LBB93_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB93_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB93_7 ; RV32ZVE32F-NEXT: .LBB93_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB93_8 ; RV32ZVE32F-NEXT: j .LBB93_9 ; @@ -11091,8 +11091,8 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB93_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB93_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -11189,31 +11189,32 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB94_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_11 ; RV32ZVE32F-NEXT: .LBB94_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_12 ; RV32ZVE32F-NEXT: .LBB94_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_13 ; RV32ZVE32F-NEXT: .LBB94_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_14 ; RV32ZVE32F-NEXT: .LBB94_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_15 ; RV32ZVE32F-NEXT: .LBB94_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_16 ; RV32ZVE32F-NEXT: .LBB94_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB94_9 ; RV32ZVE32F-NEXT: .LBB94_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -11231,52 +11232,51 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB94_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_2 ; RV32ZVE32F-NEXT: .LBB94_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_3 ; RV32ZVE32F-NEXT: .LBB94_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_4 ; RV32ZVE32F-NEXT: .LBB94_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_5 ; RV32ZVE32F-NEXT: .LBB94_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_6 ; RV32ZVE32F-NEXT: .LBB94_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB94_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB94_7 ; RV32ZVE32F-NEXT: .LBB94_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB94_8 ; RV32ZVE32F-NEXT: j .LBB94_9 ; @@ -11305,8 +11305,8 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: .LBB94_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB94_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -11404,31 +11404,32 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB95_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_11 ; RV32ZVE32F-NEXT: .LBB95_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_12 ; RV32ZVE32F-NEXT: .LBB95_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_13 ; RV32ZVE32F-NEXT: .LBB95_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_14 ; RV32ZVE32F-NEXT: .LBB95_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_15 ; RV32ZVE32F-NEXT: .LBB95_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_16 ; RV32ZVE32F-NEXT: .LBB95_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB95_9 ; RV32ZVE32F-NEXT: .LBB95_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -11446,52 +11447,51 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB95_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_2 ; RV32ZVE32F-NEXT: .LBB95_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_3 ; RV32ZVE32F-NEXT: .LBB95_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_4 ; RV32ZVE32F-NEXT: .LBB95_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_5 ; RV32ZVE32F-NEXT: .LBB95_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_6 ; RV32ZVE32F-NEXT: .LBB95_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB95_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB95_7 ; RV32ZVE32F-NEXT: .LBB95_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB95_8 ; RV32ZVE32F-NEXT: j .LBB95_9 ; @@ -11522,8 +11522,8 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: .LBB95_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB95_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -11642,31 +11642,32 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi a3, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez a3, .LBB96_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_11 +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_11 ; RV32ZVE32F-NEXT: .LBB96_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_12 +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_12 ; RV32ZVE32F-NEXT: .LBB96_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_13 +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_13 ; RV32ZVE32F-NEXT: .LBB96_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_14 +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_14 ; RV32ZVE32F-NEXT: .LBB96_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_15 +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_15 ; RV32ZVE32F-NEXT: .LBB96_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_16 +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_16 ; RV32ZVE32F-NEXT: .LBB96_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: beqz a1, .LBB96_9 ; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -11684,52 +11685,51 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa0, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 2 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_2 ; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_3 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa1, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 4 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_3 ; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_4 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 8 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_4 ; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_5 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa3, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 16 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_5 ; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa4, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_6 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 32 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_6 ; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB96_7 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, 64 +; RV32ZVE32F-NEXT: beqz a1, .LBB96_7 ; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: fld fa6, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 +; RV32ZVE32F-NEXT: fld fa6, 0(a1) +; RV32ZVE32F-NEXT: andi a1, a2, -128 ; RV32ZVE32F-NEXT: bnez a1, .LBB96_8 ; RV32ZVE32F-NEXT: j .LBB96_9 ; @@ -11878,8 +11878,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: .LBB97_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_25 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -11896,8 +11896,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4 ; RV64ZVE32F-NEXT: .LBB97_8: # %else11 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13 @@ -11910,8 +11910,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5 ; RV64ZVE32F-NEXT: .LBB97_10: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_27 ; RV64ZVE32F-NEXT: # %bb.11: # %else17 @@ -11935,8 +11935,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: .LBB97_15: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 1024 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_30 ; RV64ZVE32F-NEXT: # %bb.16: # %else29 @@ -11958,8 +11958,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13 ; RV64ZVE32F-NEXT: .LBB97_20: # %else38 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 49 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 ; RV64ZVE32F-NEXT: bgez a2, .LBB97_22 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40 @@ -12088,22 +12088,22 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64V-NEXT: vsext.vf8 v16, v8 -; RV64V-NEXT: vmv1r.v v12, v10 -; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v10, v10, 16 +; RV64V-NEXT: vslidedown.vi v12, v10, 16 +; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64V-NEXT: vslidedown.vi v8, v8, 16 -; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v8 ; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64V-NEXT: vslidedown.vi v0, v0, 2 -; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v8 +; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: li a0, 32 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64V-NEXT: vslideup.vi v12, v10, 16 -; RV64V-NEXT: vmv.v.v v8, v12 +; RV64V-NEXT: vslideup.vi v10, v12, 16 +; RV64V-NEXT: vmv.v.v v8, v10 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_baseidx_v32i8: @@ -12135,8 +12135,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_49 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 @@ -12153,8 +12153,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: .LBB98_8: # %else11 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13 @@ -12167,8 +12167,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5 ; RV64ZVE32F-NEXT: .LBB98_10: # %else14 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_51 ; RV64ZVE32F-NEXT: # %bb.11: # %else17 @@ -12192,8 +12192,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_15: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 1024 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_17 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28 @@ -12216,8 +12216,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11 ; RV64ZVE32F-NEXT: .LBB98_19: # %else32 -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 51 +; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_21 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34 @@ -12240,8 +12240,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13 ; RV64ZVE32F-NEXT: .LBB98_23: # %else38 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 49 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_54 ; RV64ZVE32F-NEXT: # %bb.24: # %else41 @@ -12265,8 +12265,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_28: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 45 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_57 ; RV64ZVE32F-NEXT: # %bb.29: # %else53 @@ -12283,8 +12283,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20 ; RV64ZVE32F-NEXT: .LBB98_32: # %else59 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 42 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_34 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.load61 @@ -12297,8 +12297,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21 ; RV64ZVE32F-NEXT: .LBB98_34: # %else62 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 41 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_59 ; RV64ZVE32F-NEXT: # %bb.35: # %else65 @@ -12322,8 +12322,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_39: # %else74 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 37 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_62 ; RV64ZVE32F-NEXT: # %bb.40: # %else77 @@ -12345,8 +12345,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29 ; RV64ZVE32F-NEXT: .LBB98_44: # %else86 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 33 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_46 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.load88 @@ -12636,11 +12636,10 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) { ; RV64V-LABEL: mgather_narrow_edge_case: ; RV64V: # %bb.0: ; RV64V-NEXT: li a1, -512 -; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64V-NEXT: vmv.v.i v0, 5 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vmerge.vim v10, v8, 0, v0 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64V-NEXT: vluxei64.v v8, (a0), v10 @@ -12724,8 +12723,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vslide1down.vx v8, v8, a5 ; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vslide1down.vx v8, v8, a7 ; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v8, v8, a7 ; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV32-NEXT: ret ; @@ -12799,8 +12798,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64V-NEXT: vmv.v.x v8, a3 ; RV64V-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-NEXT: vslide1down.vx v8, v8, a6 -; RV64V-NEXT: vslide1down.vx v8, v8, a7 ; RV64V-NEXT: vmv.v.i v0, 15 +; RV64V-NEXT: vslide1down.vx v8, v8, a7 ; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64V-NEXT: addi sp, s0, -128 ; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload @@ -12850,8 +12849,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -12892,8 +12891,8 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -12937,8 +12936,8 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -12982,8 +12981,8 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13027,8 +13026,8 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13070,8 +13069,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13116,8 +13115,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13163,8 +13162,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13213,8 +13212,8 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13260,8 +13259,8 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13316,8 +13315,8 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13363,8 +13362,8 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index 4bbda2152a6f9..ad075e4b4e198 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -401,14 +401,14 @@ define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi a3, a1, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v16, (a3) ; RV32-NEXT: vle64.v v0, (a1) +; RV32-NEXT: vle64.v v24, (a3) ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, 0 +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v8, v0, v24 -; RV32-NEXT: vmseq.vv v0, v16, v24 +; RV32-NEXT: vmseq.vv v8, v0, v16 +; RV32-NEXT: vmseq.vv v0, v24, v16 ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: vle64.v v16, (a1), v0.t ; RV32-NEXT: vmv1r.v v0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 96297e3a11bc2..e6852c1b57510 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -335,18 +335,18 @@ define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 16(a0) -; RV64ZVE32F-NEXT: ld a0, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a0) +; RV64ZVE32F-NEXT: ld a3, 8(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vse8.v v8, (a2) +; RV64ZVE32F-NEXT: vse8.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse8.v v9, (a0) -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse8.v v9, (a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse8.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse8.v v8, (a1) +; RV64ZVE32F-NEXT: vse8.v v8, (a2) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1)) ret void @@ -504,8 +504,8 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB9_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -526,8 +526,8 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB9_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -689,11 +689,11 @@ define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 ; ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 ; RV64ZVE32F-NEXT: bnez a3, .LBB12_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 @@ -747,13 +747,14 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a1 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 ; RV64ZVE32F-NEXT: andi a1, a0, 1 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: bnez a1, .LBB13_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a0, a0, 2 @@ -852,18 +853,18 @@ define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 16(a0) -; RV64ZVE32F-NEXT: ld a0, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a0) +; RV64ZVE32F-NEXT: ld a3, 8(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a0) -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse16.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-NEXT: vse16.v v8, (a2) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void @@ -1025,8 +1026,8 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB18_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -1048,8 +1049,8 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB18_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -1158,8 +1159,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB19_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -1181,8 +1182,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB19_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -1292,8 +1293,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB20_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -1316,8 +1317,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB20_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -1430,8 +1431,8 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB21_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -1453,8 +1454,8 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB21_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -1625,11 +1626,12 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.v.x v8, a0 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 -; RV64ZVE32F-NEXT: andi a1, a0, 1 -; RV64ZVE32F-NEXT: bnez a1, .LBB24_3 +; RV64ZVE32F-NEXT: andi a4, a0, 1 +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: bnez a4, .LBB24_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a0, a0, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4 @@ -1727,18 +1729,18 @@ define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 16(a0) -; RV64ZVE32F-NEXT: ld a0, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a0) +; RV64ZVE32F-NEXT: ld a3, 8(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a2) +; RV64ZVE32F-NEXT: vse32.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a0) -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v9, (a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse32.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse32.v v8, (a1) +; RV64ZVE32F-NEXT: vse32.v v8, (a2) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) ret void @@ -1903,8 +1905,8 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB29_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -1927,8 +1929,8 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB29_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2040,8 +2042,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2064,8 +2066,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB30_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2181,8 +2183,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB31_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2206,8 +2208,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB31_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2326,8 +2328,8 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: .LBB32_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2350,8 +2352,8 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB32_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2464,8 +2466,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB33_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2488,8 +2490,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB33_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2606,8 +2608,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB34_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2631,8 +2633,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: .LBB34_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -2748,8 +2750,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -2772,8 +2774,8 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB35_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -3080,17 +3082,17 @@ define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) { ; RV32ZVE32F-NEXT: lw a3, 20(a0) ; RV32ZVE32F-NEXT: lw a4, 16(a0) ; RV32ZVE32F-NEXT: lw a5, 12(a0) -; RV32ZVE32F-NEXT: lw a6, 8(a0) -; RV32ZVE32F-NEXT: lw a7, 0(a0) -; RV32ZVE32F-NEXT: lw a0, 4(a0) +; RV32ZVE32F-NEXT: lw a6, 0(a0) +; RV32ZVE32F-NEXT: lw a7, 4(a0) +; RV32ZVE32F-NEXT: lw a0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v8 -; RV32ZVE32F-NEXT: sw a7, 0(t0) -; RV32ZVE32F-NEXT: sw a0, 4(t0) +; RV32ZVE32F-NEXT: sw a6, 0(t0) +; RV32ZVE32F-NEXT: sw a7, 4(t0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a0, v9 -; RV32ZVE32F-NEXT: sw a6, 0(a0) -; RV32ZVE32F-NEXT: sw a5, 4(a0) +; RV32ZVE32F-NEXT: vmv.x.s a6, v9 +; RV32ZVE32F-NEXT: sw a0, 0(a6) +; RV32ZVE32F-NEXT: sw a5, 4(a6) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 ; RV32ZVE32F-NEXT: sw a4, 0(a0) @@ -3389,42 +3391,43 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB42_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB42_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_11 ; RV32ZVE32F-NEXT: .LBB42_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_12 ; RV32ZVE32F-NEXT: .LBB42_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_13 ; RV32ZVE32F-NEXT: .LBB42_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_14 ; RV32ZVE32F-NEXT: .LBB42_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_15 ; RV32ZVE32F-NEXT: .LBB42_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_16 ; RV32ZVE32F-NEXT: .LBB42_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_9 ; RV32ZVE32F-NEXT: .LBB42_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3439,45 +3442,44 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_2 ; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_3 ; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_4 ; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_5 ; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_6 ; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3485,7 +3487,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_7 ; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3493,7 +3495,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_8 ; RV32ZVE32F-NEXT: j .LBB42_9 ; @@ -3529,8 +3531,8 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB42_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -3633,42 +3635,43 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB43_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB43_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_11 ; RV32ZVE32F-NEXT: .LBB43_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_12 ; RV32ZVE32F-NEXT: .LBB43_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_13 ; RV32ZVE32F-NEXT: .LBB43_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_14 ; RV32ZVE32F-NEXT: .LBB43_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_15 ; RV32ZVE32F-NEXT: .LBB43_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_16 ; RV32ZVE32F-NEXT: .LBB43_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_9 ; RV32ZVE32F-NEXT: .LBB43_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3683,45 +3686,44 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_2 ; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_3 ; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_4 ; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_5 ; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_6 ; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3729,7 +3731,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_7 ; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3737,7 +3739,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_8 ; RV32ZVE32F-NEXT: j .LBB43_9 ; @@ -3773,8 +3775,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB43_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -3879,42 +3881,43 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB44_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB44_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_11 ; RV32ZVE32F-NEXT: .LBB44_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_12 ; RV32ZVE32F-NEXT: .LBB44_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_13 ; RV32ZVE32F-NEXT: .LBB44_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_14 ; RV32ZVE32F-NEXT: .LBB44_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_15 ; RV32ZVE32F-NEXT: .LBB44_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_16 ; RV32ZVE32F-NEXT: .LBB44_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_9 ; RV32ZVE32F-NEXT: .LBB44_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3929,45 +3932,44 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_2 ; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_3 ; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_4 ; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 -; RV32ZVE32F-NEXT: beqz a0, .LBB44_5 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB44_5 ; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_6 ; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3975,7 +3977,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_7 ; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -3983,7 +3985,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_8 ; RV32ZVE32F-NEXT: j .LBB44_9 ; @@ -4021,8 +4023,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB44_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -4132,42 +4134,43 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB45_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB45_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_11 ; RV32ZVE32F-NEXT: .LBB45_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_12 ; RV32ZVE32F-NEXT: .LBB45_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_13 ; RV32ZVE32F-NEXT: .LBB45_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_14 ; RV32ZVE32F-NEXT: .LBB45_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_15 ; RV32ZVE32F-NEXT: .LBB45_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_16 ; RV32ZVE32F-NEXT: .LBB45_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_9 ; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4182,45 +4185,44 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_2 ; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_3 ; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_4 ; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_5 ; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_6 ; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4228,7 +4230,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_7 ; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4236,7 +4238,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_8 ; RV32ZVE32F-NEXT: j .LBB45_9 ; @@ -4273,8 +4275,8 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: .LBB45_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -4377,42 +4379,43 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB46_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB46_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_11 ; RV32ZVE32F-NEXT: .LBB46_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_12 ; RV32ZVE32F-NEXT: .LBB46_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_13 ; RV32ZVE32F-NEXT: .LBB46_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_14 ; RV32ZVE32F-NEXT: .LBB46_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_15 ; RV32ZVE32F-NEXT: .LBB46_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_16 ; RV32ZVE32F-NEXT: .LBB46_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_9 ; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4427,45 +4430,44 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_2 ; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_3 ; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_4 ; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_5 ; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_6 ; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4473,7 +4475,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_7 ; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4481,7 +4483,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_8 ; RV32ZVE32F-NEXT: j .LBB46_9 ; @@ -4518,8 +4520,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB46_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -4624,42 +4626,43 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) ; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB47_10 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi s2, t0, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB47_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_11 ; RV32ZVE32F-NEXT: .LBB47_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_12 ; RV32ZVE32F-NEXT: .LBB47_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_13 ; RV32ZVE32F-NEXT: .LBB47_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_14 ; RV32ZVE32F-NEXT: .LBB47_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_15 ; RV32ZVE32F-NEXT: .LBB47_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_16 ; RV32ZVE32F-NEXT: .LBB47_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_9 ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4674,45 +4677,44 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_2 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_3 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_4 ; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_5 ; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, t0, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_6 ; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4720,7 +4722,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a7, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, t0, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_7 ; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4728,7 +4730,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, t0, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_8 ; RV32ZVE32F-NEXT: j .LBB47_9 ; @@ -4769,8 +4771,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB47_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a0, a5, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -4878,42 +4880,43 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: lw a4, 52(a0) ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t0, 40(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB48_10 +; RV32ZVE32F-NEXT: vmv.x.s a7, v0 +; RV32ZVE32F-NEXT: andi s2, a7, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB48_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_11 ; RV32ZVE32F-NEXT: .LBB48_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_12 ; RV32ZVE32F-NEXT: .LBB48_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_13 ; RV32ZVE32F-NEXT: .LBB48_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_14 ; RV32ZVE32F-NEXT: .LBB48_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_15 ; RV32ZVE32F-NEXT: .LBB48_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_16 ; RV32ZVE32F-NEXT: .LBB48_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_9 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4928,53 +4931,52 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_2 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_3 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_4 ; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_5 ; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_6 ; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) +; RV32ZVE32F-NEXT: sw t0, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_7 ; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4982,7 +4984,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_8 ; RV32ZVE32F-NEXT: j .LBB48_9 ; @@ -5019,8 +5021,8 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: .LBB48_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -5121,42 +5123,43 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a4, 52(a0) ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t0, 40(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB49_10 +; RV32ZVE32F-NEXT: vmv.x.s a7, v0 +; RV32ZVE32F-NEXT: andi s2, a7, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB49_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_11 ; RV32ZVE32F-NEXT: .LBB49_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_12 ; RV32ZVE32F-NEXT: .LBB49_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_13 ; RV32ZVE32F-NEXT: .LBB49_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_14 ; RV32ZVE32F-NEXT: .LBB49_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_15 ; RV32ZVE32F-NEXT: .LBB49_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_16 ; RV32ZVE32F-NEXT: .LBB49_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_9 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5171,53 +5174,52 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_2 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_3 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_4 ; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_5 ; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_6 ; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) +; RV32ZVE32F-NEXT: sw t0, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_7 ; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5225,7 +5227,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_8 ; RV32ZVE32F-NEXT: j .LBB49_9 ; @@ -5262,8 +5264,8 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB49_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -5365,42 +5367,43 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a4, 52(a0) ; RV32ZVE32F-NEXT: lw a5, 48(a0) ; RV32ZVE32F-NEXT: lw a6, 44(a0) -; RV32ZVE32F-NEXT: lw a7, 40(a0) -; RV32ZVE32F-NEXT: lw t0, 36(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 28(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 20(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw s0, 12(a0) -; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw t0, 40(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t2, 32(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t4, 24(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) +; RV32ZVE32F-NEXT: lw t6, 16(a0) +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s1, a1, 1 -; RV32ZVE32F-NEXT: bnez s1, .LBB50_10 +; RV32ZVE32F-NEXT: vmv.x.s a7, v0 +; RV32ZVE32F-NEXT: andi s2, a7, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB50_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_11 ; RV32ZVE32F-NEXT: .LBB50_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_12 ; RV32ZVE32F-NEXT: .LBB50_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_13 ; RV32ZVE32F-NEXT: .LBB50_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_14 ; RV32ZVE32F-NEXT: .LBB50_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_15 ; RV32ZVE32F-NEXT: .LBB50_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_16 ; RV32ZVE32F-NEXT: .LBB50_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_9 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5415,53 +5418,52 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store -; RV32ZVE32F-NEXT: lw s1, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw s1, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a7, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_2 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 4(a0) -; RV32ZVE32F-NEXT: sw t6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_3 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_4 ; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_5 ; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t0, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a7, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_6 ; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw a7, 0(a0) +; RV32ZVE32F-NEXT: sw t0, 0(a0) ; RV32ZVE32F-NEXT: sw a6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a7, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_7 ; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5469,7 +5471,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a5, 0(a0) ; RV32ZVE32F-NEXT: sw a4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a7, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_8 ; RV32ZVE32F-NEXT: j .LBB50_9 ; @@ -5508,8 +5510,8 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB50_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a0, a4, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -5653,31 +5655,32 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB51_10 +; RV32ZVE32F-NEXT: vmv.x.s a2, v0 +; RV32ZVE32F-NEXT: andi s2, a2, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 +; RV32ZVE32F-NEXT: bnez s2, .LBB51_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a2, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_11 ; RV32ZVE32F-NEXT: .LBB51_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, a2, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_12 ; RV32ZVE32F-NEXT: .LBB51_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, a2, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_13 ; RV32ZVE32F-NEXT: .LBB51_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, a2, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_14 ; RV32ZVE32F-NEXT: .LBB51_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, a2, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_15 ; RV32ZVE32F-NEXT: .LBB51_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a2, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_16 ; RV32ZVE32F-NEXT: .LBB51_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a2, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_9 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5697,13 +5700,12 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: addi sp, sp, 32 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store -; RV32ZVE32F-NEXT: lw a2, 4(a0) +; RV32ZVE32F-NEXT: lw a1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a2, 4(s2) +; RV32ZVE32F-NEXT: sw a1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) -; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: andi a0, a2, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_2 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5711,7 +5713,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw s1, 4(a0) ; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: andi a0, a2, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_3 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5719,7 +5721,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw t6, 0(a0) ; RV32ZVE32F-NEXT: sw t5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: andi a0, a2, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_4 ; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -5727,7 +5729,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw t4, 0(a0) ; RV32ZVE32F-NEXT: sw t3, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: andi a0, a2, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_5 ; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5735,7 +5737,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw t2, 0(a0) ; RV32ZVE32F-NEXT: sw t1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: andi a0, a2, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_6 ; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5743,7 +5745,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw t0, 0(a0) ; RV32ZVE32F-NEXT: sw a7, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: andi a0, a2, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_7 ; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5751,7 +5753,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a6, 0(a0) ; RV32ZVE32F-NEXT: sw a5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, a1, -128 +; RV32ZVE32F-NEXT: andi a0, a2, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_8 ; RV32ZVE32F-NEXT: j .LBB51_9 ; @@ -6025,18 +6027,18 @@ define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4f16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 16(a0) -; RV64ZVE32F-NEXT: ld a0, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a0) +; RV64ZVE32F-NEXT: ld a3, 8(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a2) +; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a0) -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse16.v v9, (a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse16.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v8, (a1) +; RV64ZVE32F-NEXT: vse16.v v8, (a2) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void @@ -6198,8 +6200,8 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: .LBB58_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -6221,8 +6223,8 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB58_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -6331,8 +6333,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB59_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -6354,8 +6356,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB59_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -6465,8 +6467,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB60_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -6489,8 +6491,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB60_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -6603,8 +6605,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB61_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -6626,8 +6628,8 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB61_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -6847,18 +6849,18 @@ define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4f32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: ld a3, 16(a0) -; RV64ZVE32F-NEXT: ld a0, 8(a0) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 24(a0) +; RV64ZVE32F-NEXT: ld a3, 8(a0) +; RV64ZVE32F-NEXT: ld a0, 16(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a2) +; RV64ZVE32F-NEXT: vse32.v v8, (a1) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a0) -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV64ZVE32F-NEXT: vse32.v v9, (a3) +; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV64ZVE32F-NEXT: vse32.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV64ZVE32F-NEXT: vse32.v v8, (a1) +; RV64ZVE32F-NEXT: vse32.v v8, (a2) ; RV64ZVE32F-NEXT: ret call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) ret void @@ -7023,8 +7025,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: .LBB68_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7047,8 +7049,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB68_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7160,8 +7162,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB69_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7184,8 +7186,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB69_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7301,8 +7303,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB70_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7326,8 +7328,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB70_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7446,8 +7448,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: .LBB71_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7470,8 +7472,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB71_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7584,8 +7586,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB72_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7608,8 +7610,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB72_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7726,8 +7728,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB73_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7751,8 +7753,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: .LBB73_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -7868,8 +7870,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -7892,8 +7894,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB74_9: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 @@ -8403,81 +8405,81 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB81_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_10 ; RV32ZVE32F-NEXT: .LBB81_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_11 ; RV32ZVE32F-NEXT: .LBB81_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_12 ; RV32ZVE32F-NEXT: .LBB81_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_13 ; RV32ZVE32F-NEXT: .LBB81_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_14 ; RV32ZVE32F-NEXT: .LBB81_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB81_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB81_15 ; RV32ZVE32F-NEXT: .LBB81_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_16 ; RV32ZVE32F-NEXT: .LBB81_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB81_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_2 ; RV32ZVE32F-NEXT: .LBB81_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_3 ; RV32ZVE32F-NEXT: .LBB81_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_4 ; RV32ZVE32F-NEXT: .LBB81_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_5 ; RV32ZVE32F-NEXT: .LBB81_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_6 ; RV32ZVE32F-NEXT: .LBB81_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB81_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB81_7 ; RV32ZVE32F-NEXT: .LBB81_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_8 ; RV32ZVE32F-NEXT: .LBB81_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -8510,8 +8512,8 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: .LBB81_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -8603,81 +8605,81 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB82_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_10 ; RV32ZVE32F-NEXT: .LBB82_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_11 ; RV32ZVE32F-NEXT: .LBB82_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_12 ; RV32ZVE32F-NEXT: .LBB82_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_13 ; RV32ZVE32F-NEXT: .LBB82_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_14 ; RV32ZVE32F-NEXT: .LBB82_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB82_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB82_15 ; RV32ZVE32F-NEXT: .LBB82_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_16 ; RV32ZVE32F-NEXT: .LBB82_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB82_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_2 ; RV32ZVE32F-NEXT: .LBB82_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_3 ; RV32ZVE32F-NEXT: .LBB82_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_4 ; RV32ZVE32F-NEXT: .LBB82_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_5 ; RV32ZVE32F-NEXT: .LBB82_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_6 ; RV32ZVE32F-NEXT: .LBB82_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB82_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB82_7 ; RV32ZVE32F-NEXT: .LBB82_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_8 ; RV32ZVE32F-NEXT: .LBB82_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -8710,8 +8712,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB82_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -8805,81 +8807,81 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB83_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_10 ; RV32ZVE32F-NEXT: .LBB83_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_11 ; RV32ZVE32F-NEXT: .LBB83_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_12 ; RV32ZVE32F-NEXT: .LBB83_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_13 ; RV32ZVE32F-NEXT: .LBB83_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_14 ; RV32ZVE32F-NEXT: .LBB83_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB83_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB83_15 ; RV32ZVE32F-NEXT: .LBB83_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_16 ; RV32ZVE32F-NEXT: .LBB83_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB83_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_2 ; RV32ZVE32F-NEXT: .LBB83_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_3 ; RV32ZVE32F-NEXT: .LBB83_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_4 ; RV32ZVE32F-NEXT: .LBB83_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_5 ; RV32ZVE32F-NEXT: .LBB83_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_6 ; RV32ZVE32F-NEXT: .LBB83_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB83_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB83_7 ; RV32ZVE32F-NEXT: .LBB83_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_8 ; RV32ZVE32F-NEXT: .LBB83_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -8914,8 +8916,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB83_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -9014,81 +9016,81 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB84_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_10 ; RV32ZVE32F-NEXT: .LBB84_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_11 ; RV32ZVE32F-NEXT: .LBB84_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_12 ; RV32ZVE32F-NEXT: .LBB84_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_13 ; RV32ZVE32F-NEXT: .LBB84_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_14 ; RV32ZVE32F-NEXT: .LBB84_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB84_15 ; RV32ZVE32F-NEXT: .LBB84_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_16 ; RV32ZVE32F-NEXT: .LBB84_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB84_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_2 ; RV32ZVE32F-NEXT: .LBB84_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_3 ; RV32ZVE32F-NEXT: .LBB84_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_4 ; RV32ZVE32F-NEXT: .LBB84_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_5 ; RV32ZVE32F-NEXT: .LBB84_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_6 ; RV32ZVE32F-NEXT: .LBB84_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB84_7 ; RV32ZVE32F-NEXT: .LBB84_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_8 ; RV32ZVE32F-NEXT: .LBB84_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9122,8 +9124,8 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV64ZVE32F-NEXT: .LBB84_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -9215,81 +9217,81 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB85_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_10 ; RV32ZVE32F-NEXT: .LBB85_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_11 ; RV32ZVE32F-NEXT: .LBB85_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_12 ; RV32ZVE32F-NEXT: .LBB85_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_13 ; RV32ZVE32F-NEXT: .LBB85_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_14 ; RV32ZVE32F-NEXT: .LBB85_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB85_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB85_15 ; RV32ZVE32F-NEXT: .LBB85_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_16 ; RV32ZVE32F-NEXT: .LBB85_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB85_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_2 ; RV32ZVE32F-NEXT: .LBB85_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_3 ; RV32ZVE32F-NEXT: .LBB85_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_4 ; RV32ZVE32F-NEXT: .LBB85_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_5 ; RV32ZVE32F-NEXT: .LBB85_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_6 ; RV32ZVE32F-NEXT: .LBB85_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB85_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB85_7 ; RV32ZVE32F-NEXT: .LBB85_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_8 ; RV32ZVE32F-NEXT: .LBB85_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9323,8 +9325,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB85_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -9418,81 +9420,81 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB86_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_10 ; RV32ZVE32F-NEXT: .LBB86_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_11 ; RV32ZVE32F-NEXT: .LBB86_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_12 ; RV32ZVE32F-NEXT: .LBB86_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_13 ; RV32ZVE32F-NEXT: .LBB86_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_14 ; RV32ZVE32F-NEXT: .LBB86_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB86_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB86_15 ; RV32ZVE32F-NEXT: .LBB86_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_16 ; RV32ZVE32F-NEXT: .LBB86_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB86_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_2 ; RV32ZVE32F-NEXT: .LBB86_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_3 ; RV32ZVE32F-NEXT: .LBB86_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_4 ; RV32ZVE32F-NEXT: .LBB86_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_5 ; RV32ZVE32F-NEXT: .LBB86_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_6 ; RV32ZVE32F-NEXT: .LBB86_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB86_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB86_7 ; RV32ZVE32F-NEXT: .LBB86_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_8 ; RV32ZVE32F-NEXT: .LBB86_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9530,8 +9532,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB86_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -9628,81 +9630,81 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB87_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_10 ; RV32ZVE32F-NEXT: .LBB87_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_11 ; RV32ZVE32F-NEXT: .LBB87_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_12 ; RV32ZVE32F-NEXT: .LBB87_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_13 ; RV32ZVE32F-NEXT: .LBB87_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_14 ; RV32ZVE32F-NEXT: .LBB87_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB87_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB87_15 ; RV32ZVE32F-NEXT: .LBB87_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_16 ; RV32ZVE32F-NEXT: .LBB87_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB87_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_2 ; RV32ZVE32F-NEXT: .LBB87_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_3 ; RV32ZVE32F-NEXT: .LBB87_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_4 ; RV32ZVE32F-NEXT: .LBB87_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_5 ; RV32ZVE32F-NEXT: .LBB87_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_6 ; RV32ZVE32F-NEXT: .LBB87_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB87_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB87_7 ; RV32ZVE32F-NEXT: .LBB87_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_8 ; RV32ZVE32F-NEXT: .LBB87_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9736,8 +9738,8 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV64ZVE32F-NEXT: .LBB87_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -9827,81 +9829,81 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB88_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_10 ; RV32ZVE32F-NEXT: .LBB88_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_11 ; RV32ZVE32F-NEXT: .LBB88_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_12 ; RV32ZVE32F-NEXT: .LBB88_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_13 ; RV32ZVE32F-NEXT: .LBB88_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_14 ; RV32ZVE32F-NEXT: .LBB88_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB88_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB88_15 ; RV32ZVE32F-NEXT: .LBB88_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_16 ; RV32ZVE32F-NEXT: .LBB88_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB88_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_2 ; RV32ZVE32F-NEXT: .LBB88_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_3 ; RV32ZVE32F-NEXT: .LBB88_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_4 ; RV32ZVE32F-NEXT: .LBB88_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_5 ; RV32ZVE32F-NEXT: .LBB88_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_6 ; RV32ZVE32F-NEXT: .LBB88_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB88_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB88_7 ; RV32ZVE32F-NEXT: .LBB88_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_8 ; RV32ZVE32F-NEXT: .LBB88_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -9935,8 +9937,8 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB88_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -10027,81 +10029,81 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB89_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_10 ; RV32ZVE32F-NEXT: .LBB89_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_11 ; RV32ZVE32F-NEXT: .LBB89_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_12 ; RV32ZVE32F-NEXT: .LBB89_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_13 ; RV32ZVE32F-NEXT: .LBB89_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_14 ; RV32ZVE32F-NEXT: .LBB89_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB89_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB89_15 ; RV32ZVE32F-NEXT: .LBB89_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_16 ; RV32ZVE32F-NEXT: .LBB89_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB89_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_2 ; RV32ZVE32F-NEXT: .LBB89_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_3 ; RV32ZVE32F-NEXT: .LBB89_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_4 ; RV32ZVE32F-NEXT: .LBB89_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_5 ; RV32ZVE32F-NEXT: .LBB89_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_6 ; RV32ZVE32F-NEXT: .LBB89_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB89_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB89_7 ; RV32ZVE32F-NEXT: .LBB89_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_8 ; RV32ZVE32F-NEXT: .LBB89_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10137,8 +10139,8 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB89_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -10250,81 +10252,81 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_9 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 +; RV32ZVE32F-NEXT: bnez a2, .LBB90_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_10 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_10 ; RV32ZVE32F-NEXT: .LBB90_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_11 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_11 ; RV32ZVE32F-NEXT: .LBB90_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_12 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_12 ; RV32ZVE32F-NEXT: .LBB90_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_13 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_13 ; RV32ZVE32F-NEXT: .LBB90_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_14 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_14 ; RV32ZVE32F-NEXT: .LBB90_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_15 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_15 ; RV32ZVE32F-NEXT: .LBB90_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_16 ; RV32ZVE32F-NEXT: .LBB90_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_2 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_2 ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_3 ; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_4 ; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa3, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa3, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_5 ; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa4, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_6 ; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa5, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB90_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_7 ; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: fsd fa6, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: fsd fa6, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_8 ; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -10466,8 +10468,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB91_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_25 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -10483,8 +10485,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB91_8: # %else8 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9 @@ -10496,8 +10498,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB91_10: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_27 ; RV64ZVE32F-NEXT: # %bb.11: # %else12 @@ -10520,8 +10522,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB91_15: # %else18 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 1024 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_30 ; RV64ZVE32F-NEXT: # %bb.16: # %else20 @@ -10542,8 +10544,8 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB91_20: # %else26 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 49 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_22 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27 @@ -10665,11 +10667,11 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vslidedown.vi v10, v10, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: ret ; @@ -10698,8 +10700,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 4 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_49 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 @@ -10715,8 +10717,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_8: # %else8 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9 @@ -10728,8 +10730,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_10: # %else10 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 64 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_51 ; RV64ZVE32F-NEXT: # %bb.11: # %else12 @@ -10752,8 +10754,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_15: # %else18 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 1024 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_17 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19 @@ -10774,8 +10776,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 11 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_19: # %else22 -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 51 +; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_21 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23 @@ -10796,8 +10798,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB92_23: # %else26 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 49 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_54 ; RV64ZVE32F-NEXT: # %bb.24: # %else28 @@ -10821,8 +10823,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_28: # %else34 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 45 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_57 ; RV64ZVE32F-NEXT: # %bb.29: # %else36 @@ -10839,8 +10841,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_32: # %else40 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 42 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_34 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.store41 @@ -10853,8 +10855,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_34: # %else42 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 41 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_59 ; RV64ZVE32F-NEXT: # %bb.35: # %else44 @@ -10878,8 +10880,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB92_39: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 37 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_62 ; RV64ZVE32F-NEXT: # %bb.40: # %else52 @@ -10901,8 +10903,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB92_44: # %else58 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 33 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_46 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll index b3011d0f01cab..86c28247e97ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -401,54 +401,41 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 18 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: addi a3, a2, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v24, (a2) ; RV32-NEXT: vle64.v v8, (a3) -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a4, a3, 3 -; RV32-NEXT: add a3, a4, a3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vle64.v v0, (a2) -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, 0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v8, v0, v24 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vmseq.vv v7, v24, v8 ; RV32-NEXT: addi a2, a0, 128 -; RV32-NEXT: vle64.v v8, (a2) +; RV32-NEXT: vle64.v v24, (a2) ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a2, a0, 3 -; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmseq.vv v0, v16, v24 +; RV32-NEXT: vmseq.vv v0, v16, v8 ; RV32-NEXT: addi a0, a1, 128 -; RV32-NEXT: vse64.v v8, (a0), v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vse64.v v24, (a0), v0.t +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vse64.v v8, (a1), v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 18 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index 7be015e26b098..93b4f7d2a9c9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -135,16 +135,16 @@ declare <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -271,8 +271,8 @@ define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -315,8 +315,8 @@ define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -393,16 +393,16 @@ declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -437,16 +437,16 @@ declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -481,16 +481,16 @@ declare <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -525,16 +525,16 @@ declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -569,17 +569,9 @@ declare <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 @@ -588,43 +580,36 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: vmv.v.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll index 03624113a8262..c0bd49cc9c5cb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll @@ -163,12 +163,12 @@ define i32 @reduce_sum_16xi32_prefix5(ptr %p) { ; CHECK-NEXT: li a1, -32 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vand.vv v8, v10, v12 +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -192,12 +192,12 @@ define i32 @reduce_sum_16xi32_prefix6(ptr %p) { ; CHECK-NEXT: li a1, 192 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vand.vv v8, v10, v12 +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -221,10 +221,10 @@ define i32 @reduce_sum_16xi32_prefix7(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vredsum.vs v8, v10, v8 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -248,9 +248,9 @@ define i32 @reduce_sum_16xi32_prefix8(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vredsum.vs v8, v10, v8 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -535,12 +535,12 @@ define i32 @reduce_xor_16xi32_prefix5(ptr %p) { ; CHECK-NEXT: li a1, -32 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vand.vv v8, v10, v12 +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -576,17 +576,17 @@ define i32 @reduce_and_16xi32_prefix2(ptr %p) { define i32 @reduce_and_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_and_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vredand.vs v8, v10, v10 +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -623,12 +623,12 @@ define i32 @reduce_or_16xi32_prefix5(ptr %p) { ; CHECK-NEXT: li a1, -32 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vand.vv v8, v10, v12 +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -668,17 +668,17 @@ define i32 @reduce_smax_16xi32_prefix2(ptr %p) { define i32 @reduce_smax_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_smax_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vredmax.vs v8, v10, v10 +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -713,17 +713,17 @@ define i32 @reduce_smin_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_smin_16xi32_prefix5: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vredmin.vs v8, v10, v10 +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -760,12 +760,12 @@ define i32 @reduce_umax_16xi32_prefix5(ptr %p) { ; CHECK-NEXT: li a1, -32 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v10, -1 +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsext.vf4 v12, v8 -; CHECK-NEXT: vand.vv v8, v10, v12 +; CHECK-NEXT: vsext.vf4 v12, v10 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -800,17 +800,17 @@ define i32 @reduce_umin_16xi32_prefix2(ptr %p) { define i32 @reduce_umin_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_umin_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vredminu.vs v8, v10, v10 +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -830,9 +830,9 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) { ; CHECK-LABEL: reduce_fadd_16xf32_prefix2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vfredusum.vs v8, v9, v8 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <16 x float>, ptr %p, align 256 @@ -845,17 +845,17 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) { define float @reduce_fadd_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_fadd_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v10, v8, 6 +; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v10, v8, 7 -; CHECK-NEXT: vfredusum.vs v8, v10, v8 +; CHECK-NEXT: vslideup.vi v8, v10, 7 +; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <16 x float>, ptr %p, align 256 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll index 9df160bf30f00..7adaaa05f9dd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -177,8 +177,8 @@ declare float @llvm.vp.reduce.fadd.v64f32(float, <64 x float>, <64 x i1>, i32) define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB8_2 @@ -193,8 +193,8 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret @@ -205,8 +205,8 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB9_2 @@ -221,8 +221,8 @@ define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 7dcfb247d37cb..a6763fa22822e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1853,9 +1853,9 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: addi a2, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 256 @@ -2188,8 +2188,8 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v8, (a1) @@ -2286,9 +2286,9 @@ define double @vreduce_fminimum_v64f64_nonans(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: vle64.v v16, (a2) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vle64.v v0, (a1) @@ -2563,9 +2563,9 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: addi a2, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 256 @@ -2898,8 +2898,8 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v8, (a1) @@ -2996,9 +2996,9 @@ define double @vreduce_fmaximum_v64f64_nonans(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: vle64.v v16, (a2) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vle64.v v0, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index 02a989a969960..016f95bfef7e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -801,8 +801,8 @@ declare i32 @llvm.vp.reduce.xor.v64i32(i32, <64 x i32>, <64 x i1>, i32) define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB49_2 @@ -817,8 +817,8 @@ define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> ; CHECK-NEXT: sltu a1, a1, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vredxor.vs v25, v16, v25, v0.t ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret @@ -1750,9 +1750,9 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: lui a2, %hi(.LCPI72_0) ; RV32-NEXT: addi a2, a2, %lo(.LCPI72_0) +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle8.v v12, (a2) ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vid.v v16 @@ -1794,9 +1794,9 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: li a3, 32 -; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: lui a2, %hi(.LCPI72_0) ; RV64-NEXT: addi a2, a2, %lo(.LCPI72_0) +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle8.v v12, (a2) ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vid.v v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll index 6c75c9b9c2949..28ce6a12c4c89 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -1540,22 +1540,21 @@ define i64 @vwreduce_add_v64i64(ptr %x) { ; RV32-NEXT: vslidedown.vi v24, v8, 16 ; RV32-NEXT: vslidedown.vi v0, v16, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv4r.v v8, v0 -; RV32-NEXT: vwadd.vv v0, v24, v8 +; RV32-NEXT: vwadd.vv v8, v24, v0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vwadd.vv v0, v8, v16 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v0, v8 ; RV32-NEXT: vmv.s.x v16, zero ; RV32-NEXT: vredsum.vs v8, v8, v16 @@ -1588,22 +1587,21 @@ define i64 @vwreduce_add_v64i64(ptr %x) { ; RV64-NEXT: vslidedown.vi v24, v8, 16 ; RV64-NEXT: vslidedown.vi v0, v16, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vmv4r.v v8, v0 -; RV64-NEXT: vwadd.vv v0, v24, v8 +; RV64-NEXT: vwadd.vv v8, v24, v0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vwadd.vv v0, v8, v16 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v8, v0, v8 ; RV64-NEXT: vmv.s.x v16, zero ; RV64-NEXT: vredsum.vs v8, v8, v16 @@ -1639,22 +1637,21 @@ define i64 @vwreduce_uadd_v64i64(ptr %x) { ; RV32-NEXT: vslidedown.vi v24, v8, 16 ; RV32-NEXT: vslidedown.vi v0, v16, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv4r.v v8, v0 -; RV32-NEXT: vwaddu.vv v0, v24, v8 +; RV32-NEXT: vwaddu.vv v8, v24, v0 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vwaddu.vv v0, v8, v16 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v0, v8 ; RV32-NEXT: vmv.s.x v16, zero ; RV32-NEXT: vredsum.vs v8, v8, v16 @@ -1687,22 +1684,21 @@ define i64 @vwreduce_uadd_v64i64(ptr %x) { ; RV64-NEXT: vslidedown.vi v24, v8, 16 ; RV64-NEXT: vslidedown.vi v0, v16, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vmv4r.v v8, v0 -; RV64-NEXT: vwaddu.vv v0, v24, v8 +; RV64-NEXT: vwaddu.vv v8, v24, v0 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vwaddu.vv v0, v8, v16 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v8, v0, v8 ; RV64-NEXT: vmv.s.x v16, zero ; RV64-NEXT: vredsum.vs v8, v8, v16 @@ -2286,9 +2282,9 @@ define i64 @vreduce_and_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) @@ -2871,9 +2867,9 @@ define i64 @vreduce_or_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) @@ -4074,9 +4070,9 @@ define i64 @vreduce_smin_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) @@ -4659,9 +4655,9 @@ define i64 @vreduce_smax_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) @@ -5244,9 +5240,9 @@ define i64 @vreduce_umin_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) @@ -5829,9 +5825,9 @@ define i64 @vreduce_umax_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: addi a2, a0, 384 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vle64.v v0, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll index a1f010f98ab40..dc0f4e7430555 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -24,8 +24,8 @@ define zeroext i1 @vpreduce_or_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 ; CHECK-LABEL: vpreduce_or_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -40,8 +40,8 @@ define zeroext i1 @vpreduce_xor_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i3 ; CHECK-LABEL: vpreduce_xor_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -72,8 +72,8 @@ define zeroext i1 @vpreduce_or_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 ; CHECK-LABEL: vpreduce_or_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -88,8 +88,8 @@ define zeroext i1 @vpreduce_xor_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i3 ; CHECK-LABEL: vpreduce_xor_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -120,8 +120,8 @@ define zeroext i1 @vpreduce_or_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 ; CHECK-LABEL: vpreduce_or_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -136,8 +136,8 @@ define zeroext i1 @vpreduce_xor_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i3 ; CHECK-LABEL: vpreduce_xor_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -168,8 +168,8 @@ define zeroext i1 @vpreduce_or_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 ; CHECK-LABEL: vpreduce_or_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -184,8 +184,8 @@ define zeroext i1 @vpreduce_xor_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i3 ; CHECK-LABEL: vpreduce_xor_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -264,8 +264,8 @@ define zeroext i1 @vpreduce_or_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, ; CHECK-LABEL: vpreduce_or_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -280,8 +280,8 @@ define zeroext i1 @vpreduce_xor_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, ; CHECK-LABEL: vpreduce_xor_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -296,8 +296,8 @@ define zeroext i1 @vpreduce_add_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i3 ; CHECK-LABEL: vpreduce_add_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -312,8 +312,8 @@ define zeroext i1 @vpreduce_add_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i3 ; CHECK-LABEL: vpreduce_add_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -328,8 +328,8 @@ define zeroext i1 @vpreduce_add_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i3 ; CHECK-LABEL: vpreduce_add_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -344,8 +344,8 @@ define zeroext i1 @vpreduce_add_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i3 ; CHECK-LABEL: vpreduce_add_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -360,8 +360,8 @@ define zeroext i1 @vpreduce_add_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, ; CHECK-LABEL: vpreduce_add_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: xor a0, a1, a0 @@ -488,8 +488,8 @@ define zeroext i1 @vpreduce_smin_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i ; CHECK-LABEL: vpreduce_smin_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -504,8 +504,8 @@ define zeroext i1 @vpreduce_smin_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i ; CHECK-LABEL: vpreduce_smin_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -520,8 +520,8 @@ define zeroext i1 @vpreduce_smin_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i ; CHECK-LABEL: vpreduce_smin_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -536,8 +536,8 @@ define zeroext i1 @vpreduce_smin_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i ; CHECK-LABEL: vpreduce_smin_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -552,8 +552,8 @@ define zeroext i1 @vpreduce_smin_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m ; CHECK-LABEL: vpreduce_smin_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -568,8 +568,8 @@ define zeroext i1 @vpreduce_smin_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m ; CHECK-LABEL: vpreduce_smin_v32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -584,8 +584,8 @@ define zeroext i1 @vpreduce_smin_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m ; CHECK-LABEL: vpreduce_smin_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -600,8 +600,8 @@ define zeroext i1 @vpreduce_umax_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i ; CHECK-LABEL: vpreduce_umax_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -616,8 +616,8 @@ define zeroext i1 @vpreduce_umax_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i ; CHECK-LABEL: vpreduce_umax_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -632,8 +632,8 @@ define zeroext i1 @vpreduce_umax_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i ; CHECK-LABEL: vpreduce_umax_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -648,8 +648,8 @@ define zeroext i1 @vpreduce_umax_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i ; CHECK-LABEL: vpreduce_umax_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -664,8 +664,8 @@ define zeroext i1 @vpreduce_umax_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m ; CHECK-LABEL: vpreduce_umax_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -680,8 +680,8 @@ define zeroext i1 @vpreduce_umax_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m ; CHECK-LABEL: vpreduce_umax_v32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -696,8 +696,8 @@ define zeroext i1 @vpreduce_umax_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m ; CHECK-LABEL: vpreduce_umax_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index 920d0d5fe7ba7..1f856d04ca89f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -123,15 +123,15 @@ declare <16 x half> @llvm.vp.rint.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -246,8 +246,8 @@ define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -286,8 +286,8 @@ define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -357,15 +357,15 @@ declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -397,15 +397,15 @@ declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -437,15 +437,15 @@ declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -477,15 +477,15 @@ declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -517,65 +517,54 @@ declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -597,17 +586,20 @@ define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v7, v24, fa5 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index 716cf7b0f46fa..0f587232680df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -204,8 +204,8 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.round.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -439,8 +439,8 @@ define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -483,8 +483,8 @@ define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.round.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index 603f9397dc90f..0fb7e6a7de569 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -204,8 +204,8 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.roundeven.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -439,8 +439,8 @@ define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -483,8 +483,8 @@ define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.roundeven.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index a5adfc36887ad..927f96b644227 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -204,8 +204,8 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v9, v12, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -261,16 +261,16 @@ declare <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v10, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -439,8 +439,8 @@ define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -483,8 +483,8 @@ define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -561,16 +561,16 @@ declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -605,16 +605,16 @@ declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -649,16 +649,16 @@ declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -693,16 +693,16 @@ declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -737,69 +737,59 @@ declare <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll index a4ab67f41595d..80561be0ca2f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll @@ -115,17 +115,17 @@ define signext i32 @sad_2block_16xi8_as_i32(ptr %a, ptr %b, i32 signext %stridea ; CHECK-NEXT: vwaddu.vv v10, v9, v8 ; CHECK-NEXT: vminu.vv v8, v12, v13 ; CHECK-NEXT: vmaxu.vv v9, v12, v13 -; CHECK-NEXT: vsub.vv v8, v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: add a1, a1, a3 -; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vle8.v v12, (a1) +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vle8.v v13, (a1) +; CHECK-NEXT: vsub.vv v8, v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v14, v8 ; CHECK-NEXT: vwaddu.vv v16, v14, v10 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vminu.vv v8, v9, v12 -; CHECK-NEXT: vmaxu.vv v9, v9, v12 +; CHECK-NEXT: vminu.vv v8, v12, v13 +; CHECK-NEXT: vmaxu.vv v9, v12, v13 ; CHECK-NEXT: vsub.vv v8, v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 4598bf67a2363..33e9cde4c30ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -1163,31 +1163,31 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 ; ZVFH-NEXT: addi a0, sp, 16 ; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; ZVFH-NEXT: vslidedown.vi v7, v0, 8 +; ZVFH-NEXT: vslidedown.vi v6, v0, 8 ; ZVFH-NEXT: mv a0, a2 ; ZVFH-NEXT: bltu a2, a3, .LBB43_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: li a0, 64 ; ZVFH-NEXT: .LBB43_2: +; ZVFH-NEXT: addi a1, sp, 16 +; ZVFH-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vmfeq.vv v6, v8, v24, v0.t +; ZVFH-NEXT: vmfeq.vv v7, v8, v24, v0.t ; ZVFH-NEXT: addi a0, a2, -64 ; ZVFH-NEXT: sltu a1, a2, a0 ; ZVFH-NEXT: addi a1, a1, -1 ; ZVFH-NEXT: and a0, a1, a0 +; ZVFH-NEXT: vmv1r.v v0, v6 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: add a1, sp, a1 +; ZVFH-NEXT: addi a1, a1, 16 +; ZVFH-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vmfeq.vv v24, v16, v8, v0.t +; ZVFH-NEXT: vmfeq.vv v8, v16, v24, v0.t ; ZVFH-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; ZVFH-NEXT: vslideup.vi v6, v24, 8 -; ZVFH-NEXT: vmv.v.v v0, v6 +; ZVFH-NEXT: vslideup.vi v7, v8, 8 +; ZVFH-NEXT: vmv.v.v v0, v7 ; ZVFH-NEXT: csrr a0, vlenb ; ZVFH-NEXT: slli a0, a0, 4 ; ZVFH-NEXT: add sp, sp, a0 @@ -2865,37 +2865,36 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v6, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB87_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB87_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v6, v8, v24, v0.t +; CHECK-NEXT: vmfeq.vv v7, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v24, v16, v8, v0.t +; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v6, v24, 2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vslideup.vi v7, v8, 2 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index 21bbca00921d6..5f3847e085055 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -611,10 +611,10 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: vle8.v v8, (a2) ; CHECK-NEXT: addi a2, a3, -128 ; CHECK-NEXT: sltu a4, a3, a2 -; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a2, a4, a2 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vv v6, v16, v8, v0.t @@ -622,7 +622,6 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: .LBB51_2: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -631,6 +630,7 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v6 @@ -660,8 +660,8 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB52_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -689,8 +689,8 @@ define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB53_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -1264,31 +1264,31 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 4 +; CHECK-NEXT: vslidedown.vi v6, v0, 4 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a3, .LBB99_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: .LBB99_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v6, v8, v24, v0.t +; CHECK-NEXT: vmseq.vv v7, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -32 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v24, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v8, v16, v24, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v6, v24, 4 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vslideup.vi v7, v8, 4 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -1301,8 +1301,8 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB100_2 @@ -1315,8 +1315,8 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v25, v8, 4 @@ -1331,8 +1331,8 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB101_2 @@ -1345,8 +1345,8 @@ define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v25, v8, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll index 52596d8892411..d1980ee3b0a6f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll @@ -151,8 +151,8 @@ declare <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32>, <32 x i1>, i32) define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsext_v32i64_v32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB12_2 @@ -167,8 +167,8 @@ define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsext.vf2 v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll index 609b4e9824892..925366e8b1d50 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll @@ -33,8 +33,8 @@ define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x ; VLS-LABEL: concat_4xv2i32: ; VLS: # %bb.0: ; VLS-NEXT: vmv1r.v v13, v10 -; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vmv1r.v v12, v8 +; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vslideup.vi v13, v11, 2 ; VLS-NEXT: vslideup.vi v12, v9, 2 ; VLS-NEXT: vmv2r.v v8, v12 @@ -147,8 +147,8 @@ define <16 x i32> @concat_8xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x ; VLS-NEXT: vmv1r.v v19, v14 ; VLS-NEXT: vmv1r.v v18, v12 ; VLS-NEXT: vmv1r.v v17, v10 -; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vmv1r.v v16, v8 +; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vslideup.vi v19, v15, 2 ; VLS-NEXT: vslideup.vi v18, v13, 2 ; VLS-NEXT: vslideup.vi v17, v11, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index 8499086994bc0..d461fa8378cff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -164,11 +164,10 @@ define <4 x i64> @m2_splat_into_slide_two_source_v2_lo(<4 x i64> %v1, <4 x i64> define <4 x i64> @m2_splat_into_slide_two_source(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) { ; CHECK-LABEL: m2_splat_into_slide_two_source: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vrgather.vi v12, v8, 0 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vrgather.vi v12, v8, 0 ; CHECK-NEXT: vslideup.vi v12, v10, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll index 47d7baade8b49..d70ed2fb0e266 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -101,10 +101,10 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) { ; NO-ZVBB-LABEL: reverse_v32i1: ; NO-ZVBB: # %bb.0: ; NO-ZVBB-NEXT: li a0, 32 +; NO-ZVBB-NEXT: lui a1, %hi(.LCPI4_0) +; NO-ZVBB-NEXT: addi a1, a1, %lo(.LCPI4_0) ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; NO-ZVBB-NEXT: lui a0, %hi(.LCPI4_0) -; NO-ZVBB-NEXT: addi a0, a0, %lo(.LCPI4_0) -; NO-ZVBB-NEXT: vle8.v v8, (a0) +; NO-ZVBB-NEXT: vle8.v v8, (a1) ; NO-ZVBB-NEXT: vmv.v.i v10, 0 ; NO-ZVBB-NEXT: vmerge.vim v10, v10, 1, v0 ; NO-ZVBB-NEXT: vrgather.vv v12, v10, v8 @@ -124,10 +124,10 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) { ; NO-ZVBB-LABEL: reverse_v64i1: ; NO-ZVBB: # %bb.0: ; NO-ZVBB-NEXT: li a0, 64 +; NO-ZVBB-NEXT: lui a1, %hi(.LCPI5_0) +; NO-ZVBB-NEXT: addi a1, a1, %lo(.LCPI5_0) ; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; NO-ZVBB-NEXT: lui a0, %hi(.LCPI5_0) -; NO-ZVBB-NEXT: addi a0, a0, %lo(.LCPI5_0) -; NO-ZVBB-NEXT: vle8.v v8, (a0) +; NO-ZVBB-NEXT: vle8.v v8, (a1) ; NO-ZVBB-NEXT: vmv.v.i v12, 0 ; NO-ZVBB-NEXT: vmerge.vim v12, v12, 1, v0 ; NO-ZVBB-NEXT: vrgather.vv v16, v12, v8 @@ -147,10 +147,10 @@ define <128 x i1> @reverse_v128i1(<128 x i1> %a) { ; CHECK-LABEL: reverse_v128i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: lui a1, %hi(.LCPI6_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI6_0) ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: vrgather.vv v24, v16, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll index ed381c1397d2d..82c57a9d90a0f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll @@ -19,8 +19,8 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: trn2.v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 @@ -46,13 +46,12 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: trn2.v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> @@ -97,8 +96,8 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: trn2.v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 @@ -279,8 +278,8 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: trn2.v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll index 5e93fdfc7a652..bf0eab77d0ac8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll @@ -390,8 +390,8 @@ declare <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_v32f64_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 @@ -404,8 +404,8 @@ define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 ze ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index 0e6b03bf16323..0e1105848440a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -75,9 +75,9 @@ define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) { ; CHECK-NO-MISALIGN: # %bb.0: ; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0) -; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16 -; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2) ; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8 +; CHECK-NO-MISALIGN-NEXT: addi a3, a0, 16 +; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a3) ; CHECK-NO-MISALIGN-NEXT: addi a0, a0, 24 ; CHECK-NO-MISALIGN-NEXT: vle8.v v9, (a0) ; CHECK-NO-MISALIGN-NEXT: vle8.v v11, (a2) @@ -186,9 +186,9 @@ define void @strided_constant_mismatch_4xv4i16(ptr %x, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 6 -; CHECK-NEXT: vle16.v v10, (a2) ; CHECK-NEXT: addi a2, a0, 2 +; CHECK-NEXT: addi a3, a0, 6 +; CHECK-NEXT: vle16.v v10, (a3) ; CHECK-NEXT: addi a0, a0, 8 ; CHECK-NEXT: vle16.v v9, (a0) ; CHECK-NEXT: vle16.v v11, (a2) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 6a8d2008de74d..5e64e9fbc1a2f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -480,14 +480,14 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x ; CHECK-NEXT: addi a5, a2, -16 ; CHECK-NEXT: sltu a2, a2, a5 ; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a5 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v9, 2 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: and a2, a2, a5 ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vlse64.v v16, (a4), a1, v0.t -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-NEXT: ret %load = call <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr %ptr, i32 %stride, <32 x i1> %m, i32 %evl) @@ -555,13 +555,13 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: li a4, 16 ; CHECK-RV32-NEXT: .LBB42_6: ; CHECK-RV32-NEXT: mul a5, a4, a2 -; CHECK-RV32-NEXT: add a5, a1, a5 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-RV32-NEXT: add a5, a1, a5 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v24, (a5), a2, v0.t -; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 +; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV32-NEXT: vse64.v v8, (a0) @@ -605,13 +605,13 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: li a3, 16 ; CHECK-RV64-NEXT: .LBB42_6: ; CHECK-RV64-NEXT: mul a5, a3, a2 -; CHECK-RV64-NEXT: add a5, a1, a5 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-RV64-NEXT: add a5, a1, a5 ; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v24, (a5), a2, v0.t -; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v8 +; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV64-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll index dee422a4c17d1..35f123f1157f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll @@ -420,9 +420,9 @@ define void @strided_store_v32f64(<32 x double> %v, ptr %ptr, i32 signext %strid ; CHECK-NEXT: addi a3, a2, -16 ; CHECK-NEXT: sltu a2, a2, a3 ; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index 9fa8ab39723f7..7513d31b54bd1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -54,8 +54,8 @@ define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zero ; CHECK-LABEL: vtrunc_v128i7_v128i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v0, 8 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bltu a0, a1, .LBB4_2 @@ -68,8 +68,8 @@ define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zero ; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: li a0, 128 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma @@ -243,75 +243,67 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v5, v0, 8 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v0, 4 +; CHECK-NEXT: vslidedown.vi v25, v0, 8 ; CHECK-NEXT: addi a2, a1, 512 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 48 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v27, v5, 4 -; CHECK-NEXT: addi a2, a1, 640 +; CHECK-NEXT: vslidedown.vi v27, v25, 4 +; CHECK-NEXT: addi a3, a1, 640 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a2) +; CHECK-NEXT: vle64.v v8, (a3) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a7, -64 -; CHECK-NEXT: sltu a3, a7, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a4, a3, a2 -; CHECK-NEXT: addi a2, a4, -32 -; CHECK-NEXT: sltu a3, a4, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a3, a3, a2 -; CHECK-NEXT: addi a2, a3, -16 -; CHECK-NEXT: sltu a5, a3, a2 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a2, a5, a2 ; CHECK-NEXT: vslidedown.vi v0, v27, 2 -; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a2, a2, a5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: addi a3, a7, -64 +; CHECK-NEXT: sltu a4, a7, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: addi a3, a4, -32 +; CHECK-NEXT: sltu a5, a4, a3 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a3, a5, a3 +; CHECK-NEXT: addi a5, a3, -16 +; CHECK-NEXT: sltu a6, a3, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 24 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: addi a5, a1, 128 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v26, v7, 4 ; CHECK-NEXT: bltu a3, a2, .LBB16_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v28, v26, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a5) -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma -; CHECK-NEXT: li a3, 64 -; CHECK-NEXT: vmv1r.v v0, v27 +; CHECK-NEXT: vle64.v v16, (a5) ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: li a6, 48 ; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v28, v26, 2 +; CHECK-NEXT: li a5, 64 +; CHECK-NEXT: vmv1r.v v0, v27 +; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: mul a3, a3, a6 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: mv a6, a7 -; CHECK-NEXT: bltu a7, a3, .LBB16_4 +; CHECK-NEXT: bltu a7, a5, .LBB16_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a6, 64 ; CHECK-NEXT: .LBB16_4: @@ -332,10 +324,14 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: sltu t1, a6, t0 ; CHECK-NEXT: addi t1, t1, -1 ; CHECK-NEXT: and t0, t1, t0 -; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: addi t0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (t0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr t1, vlenb +; CHECK-NEXT: li t2, 48 +; CHECK-NEXT: mul t1, t1, t2 +; CHECK-NEXT: add t1, sp, t1 +; CHECK-NEXT: addi t1, t1, 16 +; CHECK-NEXT: vl8r.v v16, (t1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t ; CHECK-NEXT: csrr t0, vlenb ; CHECK-NEXT: slli t0, t0, 4 @@ -346,19 +342,21 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a6, 16 ; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v20, v5, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a5) +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a1, 256 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v27, v25, 2 ; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: slli a5, a5, 3 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t +; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: li a6, 48 ; CHECK-NEXT: mul a5, a5, a6 @@ -371,13 +369,20 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: li a5, 32 ; CHECK-NEXT: .LBB16_8: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a5, -16 ; CHECK-NEXT: sltu a5, a5, a1 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a1, a5, a1 +; CHECK-NEXT: vmv1r.v v0, v27 +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v20 ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: bltu a4, a2, .LBB16_10 ; CHECK-NEXT: # %bb.9: @@ -385,8 +390,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: .LBB16_10: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v6, v7, 2 +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: mv a1, a7 @@ -401,13 +411,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: addi a4, a4, 16 ; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: li a5, 56 ; CHECK-NEXT: mul a4, a4, a5 ; CHECK-NEXT: add a4, sp, a4 ; CHECK-NEXT: addi a4, a4, 16 ; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: li a5, 56 @@ -446,19 +456,18 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: sltu a1, a1, a4 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a4 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t ; CHECK-NEXT: bltu a7, a2, .LBB16_14 ; CHECK-NEXT: # %bb.13: ; CHECK-NEXT: li a7, 16 ; CHECK-NEXT: .LBB16_14: -; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a2, 40 @@ -466,6 +475,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vslideup.vi v16, v8, 16 @@ -509,8 +519,8 @@ define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext ; CHECK-LABEL: vtrunc_v32i32_v32i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB17_2 @@ -523,8 +533,8 @@ define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll index 698c48bc55650..e28d55f46abcb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll @@ -390,8 +390,8 @@ declare <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_v32f64_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 @@ -404,8 +404,8 @@ define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 ze ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index 2c62cbd583d00..5601bd5ee7a3a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -377,8 +377,8 @@ define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -416,8 +416,8 @@ define <256 x i8> @vadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -1348,8 +1348,8 @@ declare <32 x i64> @llvm.vp.add.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vadd_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 @@ -1365,15 +1365,15 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB108_2 @@ -1386,8 +1386,8 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) @@ -1468,8 +1468,8 @@ define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1479,8 +1479,8 @@ define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll index 507cf5cc6b80c..d414be76672ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -1140,15 +1140,16 @@ define <11 x i64> @vand_vx_v11i64(<11 x i64> %va, i64 %b, <11 x i1> %m, i32 zero ; RV32-LABEL: vand_vx_v11i64: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v16, v0 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: lui a3, 341 ; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vmerge.vxm v24, v24, a0, v0 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: ret ; @@ -1167,10 +1168,11 @@ define <11 x i64> @vand_vx_v11i64_unmasked(<11 x i64> %va, i64 %b, i32 zeroext % ; RV32-LABEL: vand_vx_v11i64_unmasked: ; RV32: # %bb.0: ; RV32-NEXT: li a3, 32 +; RV32-NEXT: lui a4, 341 +; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: lui a3, 341 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vmerge.vxm v16, v16, a0, v0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll index 01b07b4081e6d..77a095303675f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -298,37 +298,46 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll index f32e2bbf37946..ae3dce497c6d0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -379,8 +379,8 @@ declare <32 x double> @llvm.vp.fabs.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfabs_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 @@ -393,8 +393,8 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fabs.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll index 0574773fb2fd9..e2e48cee3eacc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -862,51 +862,51 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a2) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: bltu a4, a1, .LBB50_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -954,25 +954,25 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> % ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v0, v8, v24 ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: vmv.v.v v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll index ffa88e28d7dc8..c83a298cb501e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -390,37 +390,46 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll index 17f851e172f81..60dbededb90a5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -390,37 +390,46 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll index 288efb0f1fc27..6c695b43d2718 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -626,51 +626,51 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a2) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: bltu a4, a1, .LBB50_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -718,25 +718,25 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> % ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v0, v8, v24 ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: vmv.v.v v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll index c36ec25c04f93..fbc4c56a91134 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -379,8 +379,8 @@ declare <32 x double> @llvm.vp.fneg.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfneg_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 @@ -393,8 +393,8 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fneg.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll index 6004eb4fe217a..988b200ae5365 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll @@ -379,8 +379,8 @@ declare <32 x double> @llvm.vp.sqrt.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 @@ -393,8 +393,8 @@ define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zero ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll index dd3a50cfd7737..05c7bd990642c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll @@ -105,13 +105,12 @@ define <64 x float> @vfwadd_v64f16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwadd.vv v8, v16, v24 +; CHECK-NEXT: vfwadd.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwadd.vv v8, v16, v0 @@ -216,13 +215,12 @@ define <32 x double> @vfwadd_v32f32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwadd.vv v8, v16, v24 +; CHECK-NEXT: vfwadd.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwadd.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll index 7eaa1856ce221..5a57801d33b40 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll @@ -105,13 +105,12 @@ define <64 x float> @vfwmul_v64f16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwmul.vv v8, v16, v24 +; CHECK-NEXT: vfwmul.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwmul.vv v8, v16, v0 @@ -216,13 +215,12 @@ define <32 x double> @vfwmul_v32f32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwmul.vv v8, v16, v24 +; CHECK-NEXT: vfwmul.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwmul.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll index 8cf7c5f175865..2c706cad9742f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll @@ -105,13 +105,12 @@ define <64 x float> @vfwsub_v64f16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwsub.vv v8, v16, v24 +; CHECK-NEXT: vfwsub.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwsub.vv v8, v16, v0 @@ -216,13 +215,12 @@ define <32 x double> @vfwsub_v32f32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vfwsub.vv v8, v16, v24 +; CHECK-NEXT: vfwsub.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwsub.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index 3db44e87109bd..9789afda9344a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -282,8 +282,8 @@ define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zero ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -325,8 +325,8 @@ define <256 x i8> @vmax_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -1021,8 +1021,8 @@ declare <32 x i64> @llvm.vp.smax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmax_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 @@ -1038,15 +1038,15 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmax.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmax_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 @@ -1060,8 +1060,8 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmax.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.smax.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index c97c2232715f5..36b0a4642b616 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -281,8 +281,8 @@ define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zer ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -324,8 +324,8 @@ define <256 x i8> @vmaxu_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -1020,8 +1020,8 @@ declare <32 x i64> @llvm.vp.umax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmaxu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 @@ -1037,15 +1037,15 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmaxu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 @@ -1059,8 +1059,8 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmaxu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.umax.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index eaa19110a2a28..adb0a30f34d35 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -282,8 +282,8 @@ define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zero ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -325,8 +325,8 @@ define <256 x i8> @vmin_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -1021,8 +1021,8 @@ declare <32 x i64> @llvm.vp.smin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmin_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 @@ -1038,15 +1038,15 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmin.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmin_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 @@ -1060,8 +1060,8 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmin.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.smin.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index 48175e5b905ba..671ce82d4ae79 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -281,8 +281,8 @@ define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zer ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -324,8 +324,8 @@ define <256 x i8> @vminu_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 @@ -1020,8 +1020,8 @@ declare <32 x i64> @llvm.vp.umin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vminu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 @@ -1037,15 +1037,15 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vminu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vminu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 @@ -1059,8 +1059,8 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vminu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.umin.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index a13f1eed8efb1..028fb9a626f02 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -297,10 +297,10 @@ define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> % ; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: li a0, 32 @@ -1882,10 +1882,10 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: vmv8r.v v8, v24 @@ -1904,9 +1904,9 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex ; RV64-NEXT: addi a1, a0, -16 ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t ; RV64-NEXT: ret @@ -1933,10 +1933,10 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -1961,9 +1961,9 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -1991,10 +1991,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2020,9 +2020,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2051,10 +2051,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2077,10 +2077,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a1, a1, a2 -; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV64-NEXT: ret @@ -2109,10 +2109,10 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2137,9 +2137,9 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2167,10 +2167,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2196,9 +2196,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2227,10 +2227,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2253,10 +2253,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a1, a1, a2 -; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV64-NEXT: ret @@ -2270,8 +2270,8 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV32-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB93_2 @@ -2284,10 +2284,10 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2312,9 +2312,9 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2327,8 +2327,8 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB94_2 @@ -2341,10 +2341,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2370,9 +2370,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2386,8 +2386,8 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB95_2 @@ -2400,10 +2400,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2429,9 +2429,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2457,9 +2457,9 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a3, a1, a2 ; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: li a2, 16 @@ -2467,8 +2467,8 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB96_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; @@ -2488,9 +2488,9 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 9ef89352e65e5..f204d812c14f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -377,9 +377,9 @@ define <32 x double> @vpload_v32f64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -405,9 +405,9 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: sltu a3, a3, a4 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a3, a3, a4 -; CHECK-NEXT: addi a4, a1, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-NEXT: addi a4, a1, 128 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: addi a3, a2, -32 @@ -419,17 +419,17 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB32_4: -; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 +; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a5), v0.t ; CHECK-NEXT: bltu a2, a3, .LBB32_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB32_6: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a1), v0.t ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index 466448a7a05a2..9f0561b394b81 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1193,17 +1193,17 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3 ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb @@ -1229,9 +1229,9 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index cd9a38d5167d5..0c180cd148b81 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1685,10 +1685,10 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m ; RV32-NEXT: sltu a1, a1, a0 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: ret @@ -1718,12 +1718,12 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m ; RV64-NEXT: addi a0, a2, -16 ; RV64-NEXT: sltu a1, a2, a0 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 @@ -1753,10 +1753,10 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1766,51 +1766,44 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: vmv1r.v v7, v0 ; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v0, v24, 16 +; RV64-NEXT: vslidedown.vi v16, v24, 16 +; RV64-NEXT: vmv4r.v v0, v24 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v0 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: vsext.vf2 v0, v24 +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsext.vf2 v24, v0 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v24, v0, 3 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB80_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB80_2: +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vslidedown.vi v0, v7, 2 +; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1838,10 +1831,10 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1878,21 +1871,21 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB81_2: +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 10 @@ -1925,10 +1918,10 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1965,21 +1958,21 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB82_2: +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll index c0aa735614b21..f396790f4f178 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -295,9 +295,9 @@ define void @vpstore_v32f64(<32 x double> %val, ptr %ptr, <32 x i1> %m, i32 zero ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index 291629de6dcfa..df2c83028e5df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -386,8 +386,8 @@ define <256 x i8> @vsadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %e ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -425,8 +425,8 @@ define <256 x i8> @vsadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -442,8 +442,8 @@ define <256 x i8> @vsadd_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t -; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) @@ -1361,8 +1361,8 @@ declare <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsadd_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 @@ -1378,15 +1378,15 @@ define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsadd_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB108_2 @@ -1399,8 +1399,8 @@ define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) @@ -1462,8 +1462,8 @@ define <32 x i64> @vsadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1473,8 +1473,8 @@ define <32 x i64> @vsadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) @@ -1491,8 +1491,8 @@ define <32 x i64> @vsadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1502,8 +1502,8 @@ define <32 x i64> @vsadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll index d38ee1148e894..f50dadf019910 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -382,8 +382,8 @@ define <256 x i8> @vsaddu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext % ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -421,8 +421,8 @@ define <256 x i8> @vsaddu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -438,8 +438,8 @@ define <256 x i8> @vsaddu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t -; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) @@ -1357,8 +1357,8 @@ declare <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vsaddu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 @@ -1374,15 +1374,15 @@ define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vsaddu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB108_2 @@ -1395,8 +1395,8 @@ define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) @@ -1458,8 +1458,8 @@ define <32 x i64> @vsaddu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1469,8 +1469,8 @@ define <32 x i64> @vsaddu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) @@ -1487,8 +1487,8 @@ define <32 x i64> @vsaddu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1498,8 +1498,8 @@ define <32 x i64> @vsaddu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll index 12d96fbfb88d6..4f533f2055bf3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll @@ -24,17 +24,17 @@ define <512 x i8> @vadd_v512i8_zvl128(<512 x i8> %a, <512 x i8> %b) #0 { ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: addi a4, a3, 128 +; CHECK-NEXT: addi a5, a3, 384 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: addi a2, a3, 128 -; CHECK-NEXT: addi a4, a3, 384 -; CHECK-NEXT: vle8.v v8, (a4) -; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: vle8.v v8, (a5) +; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: addi a4, a1, 128 +; CHECK-NEXT: mul a2, a2, a5 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, a1, 128 ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 @@ -48,10 +48,10 @@ define <512 x i8> @vadd_v512i8_zvl128(<512 x i8> %a, <512 x i8> %b) #0 { ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle8.v v8, (a4) +; CHECK-NEXT: vle8.v v8, (a2) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle8.v v24, (a2) +; CHECK-NEXT: vle8.v v24, (a4) ; CHECK-NEXT: vle8.v v0, (a3) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index d05f580ea7d22..0a2ed3eb1ffbf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -175,19 +175,18 @@ define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i3 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: addi a0, a3, -128 ; CHECK-NEXT: sltu a4, a3, a0 -; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: vle8.v v0, (a1) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a0, a4, a0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v24, v16, v24, v0 ; CHECK-NEXT: bltu a3, a2, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -196,6 +195,7 @@ define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i3 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb @@ -221,39 +221,39 @@ define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a1, 128 ; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle8.v v24, (a1) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0 -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 @@ -437,12 +437,12 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -456,15 +456,41 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) { ; CHECK-LABEL: select_evl_v32i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17) ret <32 x i64> %v @@ -594,12 +620,12 @@ define <64 x float> @select_v64f32(<64 x i1> %a, <64 x float> %b, <64 x float> % ; CHECK-NEXT: addi a0, a2, -32 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index 2caa2ff41a7d9..b82ca70477ba3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -399,8 +399,8 @@ define <256 x i8> @vssub_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %e ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -440,8 +440,8 @@ define <256 x i8> @vssub_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -458,8 +458,8 @@ define <256 x i8> @vssub_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) @@ -1401,8 +1401,8 @@ declare <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vssub_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 @@ -1418,15 +1418,15 @@ define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vssub_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB108_2 @@ -1440,8 +1440,8 @@ define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) @@ -1504,8 +1504,8 @@ define <32 x i64> @vssub_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1516,8 +1516,8 @@ define <32 x i64> @vssub_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: li a0, -1 ; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) @@ -1534,8 +1534,8 @@ define <32 x i64> @vssub_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1546,8 +1546,8 @@ define <32 x i64> @vssub_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: li a0, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll index 6313f31bc1a61..6d8ed563f02bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -394,8 +394,8 @@ define <256 x i8> @vssubu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext % ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -435,8 +435,8 @@ define <256 x i8> @vssubu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -453,8 +453,8 @@ define <256 x i8> @vssubu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t -; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma ; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) @@ -1396,8 +1396,8 @@ declare <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vssubu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 @@ -1413,15 +1413,15 @@ define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vssubu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB108_2 @@ -1435,8 +1435,8 @@ define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) @@ -1499,8 +1499,8 @@ define <32 x i64> @vssubu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1511,8 +1511,8 @@ define <32 x i64> @vssubu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: li a0, -1 ; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) @@ -1529,8 +1529,8 @@ define <32 x i64> @vssubu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; @@ -1541,8 +1541,8 @@ define <32 x i64> @vssubu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: li a0, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll index b1726be941e3e..d6ca6c5a4b83d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll @@ -263,13 +263,12 @@ define <128 x i16> @vwadd_v128i16(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwadd.vv v8, v16, v24 +; CHECK-NEXT: vwadd.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwadd.vv v8, v16, v0 @@ -309,13 +308,12 @@ define <64 x i32> @vwadd_v64i32(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwadd.vv v8, v16, v24 +; CHECK-NEXT: vwadd.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwadd.vv v8, v16, v0 @@ -354,13 +352,12 @@ define <32 x i64> @vwadd_v32i64(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwadd.vv v8, v16, v24 +; CHECK-NEXT: vwadd.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwadd.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll index f6d9695c51490..61378a424ecba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll @@ -263,13 +263,12 @@ define <128 x i16> @vwaddu_v128i16(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwaddu.vv v8, v16, v24 +; CHECK-NEXT: vwaddu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwaddu.vv v8, v16, v0 @@ -309,13 +308,12 @@ define <64 x i32> @vwaddu_v64i32(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwaddu.vv v8, v16, v24 +; CHECK-NEXT: vwaddu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwaddu.vv v8, v16, v0 @@ -354,13 +352,12 @@ define <32 x i64> @vwaddu_v32i64(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwaddu.vv v8, v16, v24 +; CHECK-NEXT: vwaddu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwaddu.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll index c87584ab63513..93927e10e607e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll @@ -289,13 +289,12 @@ define <128 x i16> @vwmul_v128i16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmul.vv v8, v16, v24 +; CHECK-NEXT: vwmul.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v8, v16, v0 @@ -337,13 +336,12 @@ define <64 x i32> @vwmul_v64i32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmul.vv v8, v16, v24 +; CHECK-NEXT: vwmul.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v8, v16, v0 @@ -384,13 +382,12 @@ define <32 x i64> @vwmul_v32i64(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmul.vv v8, v16, v24 +; CHECK-NEXT: vwmul.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll index a56984577ea74..ee114350a4323 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll @@ -281,13 +281,12 @@ define <128 x i16> @vwmulsu_v128i16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulsu.vv v8, v24, v16 +; CHECK-NEXT: vwmulsu.vv v24, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v8, v0, v16 @@ -329,13 +328,12 @@ define <64 x i32> @vwmulsu_v64i32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulsu.vv v8, v24, v16 +; CHECK-NEXT: vwmulsu.vv v24, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v8, v0, v16 @@ -376,13 +374,12 @@ define <32 x i64> @vwmulsu_v32i64(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulsu.vv v8, v24, v16 +; CHECK-NEXT: vwmulsu.vv v24, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v8, v0, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll index b97c9654ad3cb..17a76ae5e7f75 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll @@ -265,13 +265,12 @@ define <128 x i16> @vwmulu_v128i16(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulu.vv v8, v16, v24 +; CHECK-NEXT: vwmulu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v8, v16, v0 @@ -313,13 +312,12 @@ define <64 x i32> @vwmulu_v64i32(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulu.vv v8, v16, v24 +; CHECK-NEXT: vwmulu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v8, v16, v0 @@ -360,13 +358,12 @@ define <32 x i64> @vwmulu_v32i64(ptr %x, ptr %y) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwmulu.vv v8, v16, v24 +; CHECK-NEXT: vwmulu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v8, v16, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll index 2782a5fbb1eae..a2675d59ade93 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll @@ -263,13 +263,12 @@ define <128 x i16> @vwsub_v128i16(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsub.vv v8, v16, v24 +; CHECK-NEXT: vwsub.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsub.vv v8, v16, v0 @@ -309,13 +308,12 @@ define <64 x i32> @vwsub_v64i32(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsub.vv v8, v16, v24 +; CHECK-NEXT: vwsub.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsub.vv v8, v16, v0 @@ -354,13 +352,12 @@ define <32 x i64> @vwsub_v32i64(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsub.vv v8, v16, v24 +; CHECK-NEXT: vwsub.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsub.vv v8, v16, v0 @@ -715,8 +712,8 @@ define <8 x i16> @vwsub_vx_v8i16_i16(ptr %x, ptr %y) { define <4 x i32> @vwsub_vx_v4i32_i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsub_vx_v4i32_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: lb a1, 0(a1) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) ; CHECK-NEXT: vmv.v.x v10, a1 ; CHECK-NEXT: vwsub.vv v8, v10, v9 @@ -779,8 +776,8 @@ define <2 x i64> @vwsub_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsub_vx_v2i64_i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: lb a1, 0(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vwsub.vv v8, v10, v9 @@ -808,8 +805,8 @@ define <2 x i64> @vwsub_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsub_vx_v2i64_i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: lh a1, 0(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vwsub.vv v8, v10, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll index ccbc26c84d80d..1a9e3aac00341 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll @@ -263,13 +263,12 @@ define <128 x i16> @vwsubu_v128i16(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsubu.vv v8, v16, v24 +; CHECK-NEXT: vwsubu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsubu.vv v8, v16, v0 @@ -309,13 +308,12 @@ define <64 x i32> @vwsubu_v64i32(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsubu.vv v8, v16, v24 +; CHECK-NEXT: vwsubu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsubu.vv v8, v16, v0 @@ -354,13 +352,12 @@ define <32 x i64> @vwsubu_v32i64(ptr %x, ptr %y) nounwind { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vslidedown.vi v8, v0, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: vwsubu.vv v8, v16, v24 +; CHECK-NEXT: vwsubu.vv v24, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsubu.vv v8, v16, v0 @@ -715,8 +712,8 @@ define <8 x i16> @vwsubu_vx_v8i16_i16(ptr %x, ptr %y) { define <4 x i32> @vwsubu_vx_v4i32_i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_vx_v4i32_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: lbu a1, 0(a1) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) ; CHECK-NEXT: vmv.v.x v10, a1 ; CHECK-NEXT: vwsubu.vv v8, v10, v9 @@ -783,8 +780,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsubu_vx_v2i64_i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vwsubu.vv v8, v10, v9 @@ -816,8 +813,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsubu_vx_v2i64_i16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: lhu a1, 0(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vwsubu.vv v8, v10, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll index f4d679cd57cac..df90dae379c06 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll @@ -151,8 +151,8 @@ declare <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32>, <32 x i1>, i32) define <32 x i64> @vzext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vzext_v32i64_v32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB12_2 @@ -167,8 +167,8 @@ define <32 x i64> @vzext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 16 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vzext.vf2 v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index d464b491bbbe2..26a3e053bf7aa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -135,16 +135,16 @@ declare @llvm.vp.floor.nxv8f16(, @vp_floor_nxv8f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -179,16 +179,16 @@ declare @llvm.vp.floor.nxv16f16(, @vp_floor_nxv16f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI8_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -223,16 +223,16 @@ declare @llvm.vp.floor.nxv32f16(, @vp_floor_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -359,8 +359,8 @@ define @vp_floor_nxv4f32( %va, @vp_floor_nxv8f32( %va, @vp_floor_nxv16f32( %va, @llvm.vp.floor.nxv2f64(, @vp_floor_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -569,16 +569,16 @@ declare @llvm.vp.floor.nxv4f64(, @vp_floor_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -613,16 +613,16 @@ declare @llvm.vp.floor.nxv7f64(, @vp_floor_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -657,16 +657,16 @@ declare @llvm.vp.floor.nxv8f64(, @vp_floor_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -705,66 +705,56 @@ define @vp_floor_nxv16f64( %va, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_vv_nxv4f16( %va, @vfmax_vv_nxv8f16( %va, @vfmax_vv_nxv16f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: li a4, 24 @@ -647,11 +631,12 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 @@ -683,13 +668,13 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: li a2, 24 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 @@ -1027,13 +1012,13 @@ define @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64_unmasked( ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index 48baa12aa2e59..e942593924987 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -214,10 +214,7 @@ define @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_vv_nxv4f16( %va, @vfmin_vv_nxv8f16( %va, @vfmin_vv_nxv16f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: li a4, 24 @@ -647,11 +631,12 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 @@ -683,13 +668,13 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: li a2, 24 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 @@ -1027,13 +1012,13 @@ define @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64_unmasked( ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index bb28ff5c6dc4f..aa845bd8bb0b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -448,8 +448,8 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -467,8 +467,8 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -609,8 +609,8 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -628,8 +628,8 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -780,8 +780,8 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -799,8 +799,8 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -1397,8 +1397,8 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1422,8 +1422,8 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1440,11 +1440,11 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1454,8 +1454,8 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -1473,8 +1473,8 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -1485,18 +1485,18 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 @@ -1710,8 +1710,8 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1735,8 +1735,8 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1753,11 +1753,11 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -1767,8 +1767,8 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -1786,8 +1786,8 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -1798,18 +1798,18 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 @@ -2045,8 +2045,8 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2070,8 +2070,8 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2088,11 +2088,11 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2102,8 +2102,8 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -2121,8 +2121,8 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -2133,18 +2133,18 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v10, 4 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addi a0, a0, -1 @@ -2279,9 +2279,9 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 @@ -2412,9 +2412,9 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: snez a1, a1 @@ -2524,9 +2524,9 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv a2, s1 @@ -2686,9 +2686,9 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 @@ -2819,9 +2819,9 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: snez a1, a1 @@ -2931,9 +2931,9 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv a2, s1 @@ -3819,8 +3819,8 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -3838,8 +3838,8 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -3978,8 +3978,8 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -3997,8 +3997,8 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -4148,8 +4148,8 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -4167,8 +4167,8 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -4753,8 +4753,8 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -4778,8 +4778,8 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -4796,11 +4796,11 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -4810,8 +4810,8 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -4829,8 +4829,8 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -4841,18 +4841,18 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 @@ -5064,8 +5064,8 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5089,8 +5089,8 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5107,11 +5107,11 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5121,8 +5121,8 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -5140,8 +5140,8 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -5152,18 +5152,18 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 @@ -5398,8 +5398,8 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s6 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5423,8 +5423,8 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s4 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5441,11 +5441,11 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5455,8 +5455,8 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -5474,8 +5474,8 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -5486,18 +5486,18 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 2 -; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v10, 4 ; CHECK-V-NEXT: lui a0, 16 ; CHECK-V-NEXT: addi a0, a0, -1 @@ -5633,9 +5633,9 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 @@ -5766,9 +5766,9 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti @@ -5867,9 +5867,9 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv a2, a1 @@ -6019,9 +6019,9 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 @@ -6152,9 +6152,9 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti @@ -6253,9 +6253,9 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv a2, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll index 8f36aad817274..c45af61ced94f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll @@ -163,12 +163,11 @@ define @test_signed_v4f64_v4i16( %f) { ; CHECK-NEXT: vfmin.vf v12, v12, fa4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 +; CHECK-NEXT: vnsrl.wi v8, v16, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i16( %f) ret %x @@ -186,12 +185,11 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK-NEXT: vfmin.vf v16, v16, fa4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v24, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: vnsrl.wi v8, v24, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f64.nxv8i16( %f) ret %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index 84fb777c64b8c..bc5617957d7d0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -960,141 +960,158 @@ define @fshr_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @llrint_nxv16i64_nxv16f32( %x, < ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfwcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll index 9fa8807ed4add..c9f91bf9def2c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll @@ -132,8 +132,8 @@ define @lrint_nxv16f32( %x, @intrinsic_viota_mask_m_nxv1i8_nxv1i1( ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i8_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: viota.m v8, v9, v0.t ; CHECK-NEXT: ret entry: @@ -1313,8 +1313,8 @@ define @intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1( ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmsbf.m v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret @@ -1444,8 +1444,8 @@ define @intrinsic_vmsbf_mask_m_nxv64i1_nxv64i1( %ptrs0, %ptr ; RV32-LABEL: mgather_nxv16i64: ; RV32: # %bb.0: ; RV32-NEXT: vl8re64.v v24, (a0) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: srli a2, a0, 3 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 +; RV32-NEXT: vslidedown.vx v7, v0, a2 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: vluxei32.v v24, (zero), v12, v0.t ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, a1, a0 @@ -1216,20 +1216,35 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ; ; RV64-LABEL: mgather_nxv16i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: sub sp, sp, a3 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV64-NEXT: vmv8r.v v16, v8 ; RV64-NEXT: vl8re64.v v24, (a0) -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV64-NEXT: vluxei64.v v24, (zero), v8, v0.t -; RV64-NEXT: vl8re64.v v8, (a1) ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vl8re64.v v8, (a1) ; RV64-NEXT: srli a1, a0, 3 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: vslidedown.vx v7, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v24, (zero), v16, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vluxei64.v v8, (zero), v16, v0.t ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, a2, a0 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: vs8r.v v24, (a2) +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %p0 = call @llvm.vector.insert.nxv8p0.nxv16p0( undef, %ptrs0, i64 0) %p1 = call @llvm.vector.insert.nxv8p0.nxv16p0( %p0, %ptrs1, i64 8) @@ -2116,8 +2131,8 @@ define @mgather_baseidx_nxv32i8(ptr %base, ; RV64-NEXT: vluxei64.v v15, (a0), v16, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index fc8fdf4aaafe2..9bfa0f31dc3a6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1691,15 +1691,15 @@ declare @llvm.vector.insert.nxv8p0.nxv16p0( %val0, %val1, %ptrs0, %ptrs1, %m) { ; RV32-LABEL: mscatter_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vl4re32.v v24, (a0) ; RV32-NEXT: vl4re32.v v28, (a1) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t +; RV32-NEXT: vl4re32.v v4, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: srli a0, a0, 3 ; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a0 +; RV32-NEXT: vslidedown.vx v24, v0, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v4, v0.t +; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t ; RV32-NEXT: ret ; @@ -1708,25 +1708,36 @@ define void @mscatter_nxv16f64( %val0, %val0, %val0, %idxs @@ -1785,13 +1796,13 @@ define void @mscatter_baseidx_nxv16i16_nxv16f64( %val0, %val0, %idxs diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index ebe89817630d0..a3ea462b6a737 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -204,8 +204,8 @@ define @vp_nearbyint_nxv4f16( %va, @llvm.vp.nearbyint.nxv8f16(, @vp_nearbyint_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: frflags a0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -290,8 +290,8 @@ define @vp_nearbyint_nxv8f16( %va, @llvm.vp.nearbyint.nxv16f16(, < define @vp_nearbyint_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: frflags a0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -376,8 +376,8 @@ define @vp_nearbyint_nxv16f16( %va, @llvm.vp.nearbyint.nxv32f16(, < define @vp_nearbyint_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: frflags a0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu @@ -458,7 +458,7 @@ define @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t ; ZVFHMIN-NEXT: frflags a2 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a2 @@ -701,8 +700,8 @@ define @vp_nearbyint_nxv4f32( %va, @vp_nearbyint_nxv8f32( %va, @vp_nearbyint_nxv16f32( %va, < ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -867,16 +866,16 @@ declare @llvm.vp.nearbyint.nxv2f64(, define @vp_nearbyint_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -911,16 +910,16 @@ declare @llvm.vp.nearbyint.nxv4f64(, define @vp_nearbyint_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -955,16 +954,16 @@ declare @llvm.vp.nearbyint.nxv7f64(, define @vp_nearbyint_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -999,16 +998,16 @@ declare @llvm.vp.nearbyint.nxv8f64(, define @vp_nearbyint_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -1044,62 +1043,47 @@ declare @llvm.vp.nearbyint.nxv16f64( @vp_nearbyint_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: lui a3, %hi(.LCPI32_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vfabs.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll index d13d67fd0a882..8bb62eaa8e9e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll @@ -27,20 +27,18 @@ define <4 x float> @foo(ptr %0) nounwind { ; CHECK-NEXT: fsw fa0, 0(sp) ; CHECK-NEXT: addi a0, sp, 4 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 12 ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: addi a0, sp, 12 ; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: vle32.v v11, (a0) ; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vi v10, v9, 1 +; CHECK-NEXT: vslideup.vi v8, v11, 1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index f934127f978dc..88bd92c6ec161 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -185,8 +185,8 @@ define @vp_rint_nxv4f16( %va, @llvm.vp.rint.nxv8f16(, @vp_rint_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -263,8 +263,8 @@ define @vp_rint_nxv8f16( %va, @llvm.vp.rint.nxv16f16(, @vp_rint_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -341,8 +341,8 @@ define @vp_rint_nxv16f16( %va, @llvm.vp.rint.nxv32f16(, @vp_rint_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t -; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu @@ -426,46 +426,50 @@ define @vp_rint_nxv32f16( %va, @vp_rint_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -640,8 +644,8 @@ define @vp_rint_nxv4f32( %va, @vp_rint_nxv8f32( %va, @vp_rint_nxv16f32( %va, @llvm.vp.rint.nxv2f64(, @vp_rint_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -831,15 +835,15 @@ declare @llvm.vp.rint.nxv4f64(, @vp_rint_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -871,15 +875,15 @@ declare @llvm.vp.rint.nxv7f64(, @vp_rint_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -911,15 +915,15 @@ declare @llvm.vp.rint.nxv8f64(, @vp_rint_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -955,62 +959,51 @@ define @vp_rint_nxv16f64( %va, @vp_round_nxv4f16( %va, @llvm.vp.round.nxv8f16(, @vp_round_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define @vp_round_nxv8f16( %va, @llvm.vp.round.nxv16f16(, @vp_round_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -376,8 +376,8 @@ define @vp_round_nxv16f16( %va, @llvm.vp.round.nxv32f16(, @vp_round_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -458,7 +458,6 @@ define @vp_round_nxv32f16( %va, @vp_round_nxv32f16( %va, @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 4 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -701,8 +708,8 @@ define @vp_round_nxv4f32( %va, @vp_round_nxv8f32( %va, @vp_round_nxv16f32( %va, @llvm.vp.round.nxv2f64(, @vp_round_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -911,16 +918,16 @@ declare @llvm.vp.round.nxv4f64(, @vp_round_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -955,16 +962,16 @@ declare @llvm.vp.round.nxv7f64(, @vp_round_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -999,16 +1006,16 @@ declare @llvm.vp.round.nxv8f64(, @vp_round_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1047,66 +1054,56 @@ define @vp_round_nxv16f64( %va, @vp_roundeven_nxv4f16( %va, @llvm.vp.roundeven.nxv8f16(, @vp_roundeven_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define @vp_roundeven_nxv8f16( %va, @llvm.vp.roundeven.nxv16f16(, < define @vp_roundeven_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -376,8 +376,8 @@ define @vp_roundeven_nxv16f16( %va, @llvm.vp.roundeven.nxv32f16(, < define @vp_roundeven_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -458,7 +458,6 @@ define @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -701,8 +708,8 @@ define @vp_roundeven_nxv4f32( %va, @vp_roundeven_nxv8f32( %va, @vp_roundeven_nxv16f32( %va, < ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -867,16 +874,16 @@ declare @llvm.vp.roundeven.nxv2f64(, define @vp_roundeven_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -911,16 +918,16 @@ declare @llvm.vp.roundeven.nxv4f64(, define @vp_roundeven_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -955,16 +962,16 @@ declare @llvm.vp.roundeven.nxv7f64(, define @vp_roundeven_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -999,16 +1006,16 @@ declare @llvm.vp.roundeven.nxv8f64(, define @vp_roundeven_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1047,66 +1054,56 @@ define @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v25, v0, a2 +; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: lui a3, %hi(.LCPI32_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 71a53c525551c..1227e73a02432 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -204,8 +204,8 @@ define @vp_roundtozero_nxv4f16( %va, @llvm.vp.roundtozero.nxv8f16(, @vp_roundtozero_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -290,8 +290,8 @@ define @vp_roundtozero_nxv8f16( %va, @llvm.vp.roundtozero.nxv16f16(, define @vp_roundtozero_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI8_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1) +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -376,8 +376,8 @@ define @vp_roundtozero_nxv16f16( %va, < ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v12, v24, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -433,16 +433,16 @@ declare @llvm.vp.roundtozero.nxv32f16(, define @vp_roundtozero_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI10_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1) +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -458,7 +458,6 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vmv1r.v v16, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -466,52 +465,60 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vmv1r.v v16, v0 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a2, 1 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v17 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB10_2: ; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vmv1r.v v8, v16 ; ZVFHMIN-NEXT: vmv1r.v v0, v16 -; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t ; ZVFHMIN-NEXT: fsrmi a0, 1 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v16 -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 -; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -556,20 +563,20 @@ define @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv4f32( %va, @vp_roundtozero_nxv8f32( %va, @vp_roundtozero_nxv16f32( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -867,16 +874,16 @@ declare @llvm.vp.roundtozero.nxv2f64( define @vp_roundtozero_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -911,16 +918,16 @@ declare @llvm.vp.roundtozero.nxv4f64( define @vp_roundtozero_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -955,16 +962,16 @@ declare @llvm.vp.roundtozero.nxv7f64( define @vp_roundtozero_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -999,16 +1006,16 @@ declare @llvm.vp.roundtozero.nxv8f64( define @vp_roundtozero_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1047,66 +1054,56 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v25, v0, a2 +; CHECK-NEXT: vslidedown.vx v6, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: lui a3, %hi(.LCPI32_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: lui a3, %hi(.LCPI32_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 1 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll index e73415ac0085e..8210ea22a6ee9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -67,13 +67,13 @@ define @foo( %a, @foo( %a, @foo( %a, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv32f64( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vx_nxv128i8( %va, i8 %b, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB97_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -1186,8 +1186,8 @@ define @icmp_eq_vx_swap_nxv128i8( %va, i8 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB98_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -2257,19 +2257,18 @@ define @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vx_nxv32i32( %va, i32 %b, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB190_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -2344,8 +2344,8 @@ define @icmp_eq_vx_swap_nxv32i32( %va, i32 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB191_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index ab7da9e0faf2b..6e327457bebff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -49,8 +49,8 @@ define <8 x i8> @v4i8_2(<4 x i8> %a, <4 x i8> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -78,11 +78,11 @@ define <16 x i8> @v8i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -174,8 +174,8 @@ define <8 x i16> @v4i16_2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -234,10 +234,10 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: addi a0, a0, %lo(.LCPI15_0) ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v20, (a0) -; CHECK-NEXT: vmv2r.v v16, v10 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv2r.v v20, v10 ; CHECK-NEXT: vmv2r.v v12, v8 -; CHECK-NEXT: vrgather.vv v8, v12, v20 +; CHECK-NEXT: vrgather.vv v8, v12, v16 ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vi v12, v12, 15 ; CHECK-NEXT: lui a0, 16 @@ -245,7 +245,7 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t +; CHECK-NEXT: vrgather.vv v8, v20, v12, v0.t ; CHECK-NEXT: ret %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %v32i16 @@ -329,18 +329,18 @@ define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: v8i32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 -; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: vrsub.vi v18, v14, 15 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v18, v10, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v8, v12, v18 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v18 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vrsub.vi v12, v14, 7 +; CHECK-NEXT: vrsub.vi v8, v10, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vrgatherei16.vv v8, v16, v12, v0.t +; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %v16i32 = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> ret <16 x i32> %v16i32 @@ -492,8 +492,8 @@ define <8 x half> @v4f16_2(<4 x half> %a, <4 x half> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -638,18 +638,18 @@ define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: v8f32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 -; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: vrsub.vi v18, v14, 15 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v18, v10, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v8, v12, v18 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v18 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vrsub.vi v12, v14, 7 +; CHECK-NEXT: vrsub.vi v8, v10, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vrgatherei16.vv v8, v16, v12, v0.t +; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %v16f32 = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> ret <16 x float> %v16f32 diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 8a297db7a3b8f..d1c98f828e76d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -4873,8 +4873,8 @@ define void @sink_splat_vp_icmp(ptr nocapture %x, i32 signext %y, <4 x i1> %m, i ; CHECK-NEXT: .LBB102_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vmseq.vx v0, v10, a1, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vse32.v v9, (a0), v0.t @@ -4914,8 +4914,8 @@ define void @sink_splat_vp_fcmp(ptr nocapture %x, float %y, <4 x i1> %m, i32 zer ; CHECK-NEXT: .LBB103_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle32.v v10, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vf v0, v10, fa0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vse32.v v9, (a0), v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll index b7fe722958bfb..9d0234d2ec2fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll @@ -33,13 +33,13 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 ; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmerge.vxm v9, v10, a0, v0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vvm v8, v11, v9, v0 ; CHECK-NEXT: ret %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp @@ -52,13 +52,13 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 ; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmerge.vxm v9, v10, a0, v0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vvm v8, v11, v9, v0 ; CHECK-NEXT: ret %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y) ret <8 x i16> %tmp @@ -70,14 +70,14 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmerge.vxm v9, v10, a0, v0 ; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vvm v8, v11, v9, v0 ; CHECK-NEXT: ret %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) ret <16 x i8> %tmp @@ -115,13 +115,13 @@ define @vec_nxv4i32( %x, ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v14, v12, v10 -; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v16, v14, v10 +; CHECK-NEXT: vmsne.vv v10, v8, v16 +; CHECK-NEXT: vmerge.vxm v8, v12, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v14, v8, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv4i32( %x, %y) ret %tmp @@ -134,13 +134,13 @@ define @vec_nxv8i16( %x, ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v14, v12, v10 -; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v16, v14, v10 +; CHECK-NEXT: vmsne.vv v10, v8, v16 +; CHECK-NEXT: vmerge.vxm v8, v12, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v14, v8, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv8i16( %x, %y) ret %tmp @@ -152,14 +152,14 @@ define @vec_nxv16i8( %x, ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v14, v12, v10 -; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v16, v14, v10 +; CHECK-NEXT: vmsne.vv v10, v8, v16 +; CHECK-NEXT: vmerge.vxm v8, v12, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v14, v8, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv16i8( %x, %y) ret %tmp diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll index eff8c26d4d061..b3150ecea6c0b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -562,8 +562,8 @@ define @add_stepvector_nxv16i64() { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v8 ; RV32-NEXT: vadd.vv v8, v8, v8 @@ -597,8 +597,8 @@ define @mul_stepvector_nxv16i64() { ; RV32-NEXT: slli a1, a0, 1 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v8 ; RV32-NEXT: li a0, 3 @@ -686,8 +686,8 @@ define @shl_stepvector_nxv16i64() { ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v8 ; RV32-NEXT: vsll.vi v8, v8, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 0e2105d5cba86..4d3bced0bcb50 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -588,15 +588,15 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr %ptr, i64 %stride, %mask, i32 %evl) @@ -697,10 +697,10 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, %v, ptr %ptr, i32 sig ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a5 ; CHECK-NEXT: mul a4, a4, a1 -; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: srli a3, a3, 3 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-NEXT: ret @@ -567,36 +567,36 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a1), a2, v0.t ; CHECK-NEXT: sub a0, a5, a4 ; CHECK-NEXT: sltu t0, a5, a0 ; CHECK-NEXT: addi t0, t0, -1 -; CHECK-NEXT: and a0, t0, a0 -; CHECK-NEXT: mul a7, a7, a2 -; CHECK-NEXT: add a7, a1, a7 -; CHECK-NEXT: srli t0, a4, 3 +; CHECK-NEXT: and t0, t0, a0 +; CHECK-NEXT: mul a0, a7, a2 +; CHECK-NEXT: add a7, a1, a0 +; CHECK-NEXT: srli a0, a4, 3 ; CHECK-NEXT: vsetvli t1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v24, t0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a0 ; CHECK-NEXT: sub a0, a3, a6 ; CHECK-NEXT: sltu a3, a3, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 +; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a7), a2, v0.t ; CHECK-NEXT: bltu a0, a4, .LBB43_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a0, a4 ; CHECK-NEXT: .LBB43_6: ; CHECK-NEXT: mul a3, a5, a2 -; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: srli a4, a4, 2 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v24, a4 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsse64.v v8, (a1), a2, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index 4b5e737d22eb8..ede395f4df8e1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -580,8 +580,8 @@ define @vadd_vi_nxv128i8( %va, @llvm.vp.add.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1359,8 +1359,8 @@ define @vadd_vi_nxv32i32( %va, @llvm.vp.add.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) @@ -1415,8 +1415,8 @@ define @vadd_vi_nxv32i32_evl_nx8( %va, @vadd_vi_nxv32i32_evl_nx16( %va, < ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %evl = call i32 @llvm.vscale.i32() diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll index 939a45e15c103..6e34d59a2d989 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll @@ -255,9 +255,9 @@ define @vfsgnj_vv_nxv32f16( %va, @vfsgnj_vv_nxv32f16( %va, @vfsgnj_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop.ll index 6f06d8e570de0..e59a9174b03d9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcpop.ll @@ -44,8 +44,8 @@ define iXLen @intrinsic_vcpop_mask_m_nxv1i1( %0, %0, %0, %0, %0, %0, %0, , <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vadd.vv v11, v9, v9 -; CHECK-NEXT: vrgather.vv v9, v10, v11 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vrgather.vv v9, v10, v11 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t ; CHECK-NEXT: vmsne.vi v9, v9, 0 ; CHECK-NEXT: vadd.vi v12, v11, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 8f4ff37fffb02..f0f847c61f3b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -110,23 +110,22 @@ define {, } @vector_deinterleave_load_nxv8i6 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vl8re64.v v8, (a1) -; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v16, v8, v8 -; CHECK-NEXT: vrgather.vv v8, v0, v16 +; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v0, (a1) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vv v16, v8, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 @@ -134,34 +133,47 @@ define {, } @vector_deinterleave_load_nxv8i6 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vrgather.vv v24, v8, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vrgather.vv v8, v0, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vadd.vi v8, v16, 1 -; CHECK-NEXT: vrgather.vv v16, v0, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vrgather.vv v16, v0, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vrgather.vv v24, v0, v8 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v28, v8 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v28, v8 -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v20, v8 ; CHECK-NEXT: vmv8r.v v8, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 7797577362c93..bcb008857ad32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -8,14 +8,15 @@ define {, } @vector_deinterleave_nxv16i1_nxv ; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a0 +; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 ; CHECK-NEXT: vnsrl.wi v10, v12, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vnsrl.wi v10, v12, 8 @@ -90,25 +91,38 @@ declare {, } @llvm.vector.deinterleave2.nxv4 define {, } @vector_deinterleave_nxv64i1_nxv128i1( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv64i1_nxv128i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v8 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 0 +; CHECK-NEXT: vnsrl.wi v8, v16, 0 +; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v28, v8, 0 +; CHECK-NEXT: vnsrl.wi v12, v24, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v7, v24, 0 +; CHECK-NEXT: vmsne.vi v7, v8, 0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 8 -; CHECK-NEXT: vnsrl.wi v28, v8, 8 +; CHECK-NEXT: vnsrl.wi v0, v16, 8 +; CHECK-NEXT: vnsrl.wi v4, v24, 8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v9, v24, 0 -; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vmsne.vi v9, v0, 0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %retval = call {, } @llvm.vector.deinterleave2.nxv128i1( %vec) ret {, } %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index 7ade47e60bc69..5ebf63f0a4411 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -107,14 +107,14 @@ define void @vector_interleave_store_nxv16i64_nxv8i64( %a, %a, @vector_interleave_nxv4i64_nxv2i64( ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vand.vi v13, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v13, 0 ; CHECK-NEXT: vsrl.vi v16, v12, 1 -; CHECK-NEXT: vand.vi v12, v12, 1 -; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -137,9 +137,9 @@ define @vector_interleave_nxv4i64_nxv2i64( ; ZVBB-NEXT: srli a0, a0, 2 ; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; ZVBB-NEXT: vid.v v12 +; ZVBB-NEXT: vand.vi v13, v12, 1 +; ZVBB-NEXT: vmsne.vi v0, v13, 0 ; ZVBB-NEXT: vsrl.vi v16, v12, 1 -; ZVBB-NEXT: vand.vi v12, v12, 1 -; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: vadd.vx v16, v16, a0, v0.t ; ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; ZVBB-NEXT: vrgatherei16.vv v12, v8, v16 @@ -269,13 +269,13 @@ define @vector_interleave_nxv32i32_nxv16i32( @llvm.vector.interleave2.nxv32i32( %a, %b) @@ -288,44 +288,32 @@ define @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv32f32_nxv16f32( @llvm.vector.interleave2.nxv32f32( %a, %b) @@ -612,44 +588,32 @@ define @vector_interleave_nxv16f64_nxv8f64( @vector_interleave_nxv16f64_nxv8f64( @splice_nxv32i1_offset_max( %a, @splice_nxv64i1_offset_max( %a, @vfabs_vv_nxv32f16( %va, @vfabs_vv_nxv16f64( %va, @llvm.vp.fabs.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index c69a7bc5cece4..ad7fb63fec2fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -579,9 +579,9 @@ define @vfadd_vv_nxv32f16( %va, @vfadd_vv_nxv32f16( %va, @vfadd_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -675,12 +675,20 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfadd_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -692,7 +700,9 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -702,11 +712,15 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB24_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v3 ; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index 3ad17e85570a2..81d844d1950ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -525,9 +525,9 @@ define @vfdiv_vv_nxv32f16( %va, @vfdiv_vv_nxv32f16( %va, @vfdiv_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -621,12 +621,20 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfdiv_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -638,7 +646,9 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -648,11 +658,15 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v3 ; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst.ll index 3be3f835f3d11..eafd605c6110e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfirst.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfirst.ll @@ -44,8 +44,8 @@ define iXLen @intrinsic_vfirst_mask_m_nxv1i1( %0, %0, %0, %0, %0, %0, %0, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64_unmasked( ; CHECK-NEXT: vl8re64.v v16, (a3) ; CHECK-NEXT: sub a3, a4, a1 ; CHECK-NEXT: sltu a5, a4, a3 -; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24 ; CHECK-NEXT: bltu a4, a1, .LBB93_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB93_2: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 @@ -1259,6 +1258,7 @@ define @vfma_vv_nxv16f64_unmasked( ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v0, v24, v8 ; CHECK-NEXT: vmv.v.v v8, v0 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll index d4ba0f8c90733..c15b875e8f0c4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll @@ -239,15 +239,15 @@ define @vfmadd_vv_nxv16f16( %va, @vfmadd_vv_nxv32f16( %va, @vfmadd_vf_nxv32f16( %va, @vfmadd_vv_nxv32f16( %va, @vfmadd_vf_nxv32f16( %va, @vfmadd_vf_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll index 755c665376128..e928df85b5bb5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll @@ -255,9 +255,9 @@ define @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll index 5114f0a8d1d65..c835dc72268b3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll @@ -347,75 +347,64 @@ define @vfmsub_vv_nxv32f16( %va, @vfmsub_vf_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @vfmul_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -621,12 +621,20 @@ define @vfmul_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfmul_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -638,7 +646,9 @@ define @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -648,11 +658,15 @@ define @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v3 ; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll index bacf9bae83ed7..abda6750e5a8a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -1134,12 +1134,10 @@ define @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64_unmasked( ; CHECK-NEXT: vl8re64.v v16, (a3) ; CHECK-NEXT: sub a3, a4, a1 ; CHECK-NEXT: sltu a5, a4, a3 -; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24 ; CHECK-NEXT: bltu a4, a1, .LBB93_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB93_2: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 @@ -1259,6 +1258,7 @@ define @vfma_vv_nxv16f64_unmasked( ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v0, v24, v8 ; CHECK-NEXT: vmv.v.v v8, v0 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll index 1db5fa1720a27..69ea7ce33cf6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -250,8 +250,8 @@ define @vfneg_vv_nxv32f16( %va, @vfneg_vv_nxv16f64( %va, @llvm.vp.fneg.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll index 785f60ad1d39c..b54590cd9d844 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll @@ -412,85 +412,85 @@ define @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, @vfpext_nxv32f16_nxv32f32( %a, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll index 15c4bf255e6dc..f3544589407d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll @@ -394,7 +394,15 @@ declare @llvm.vp.fptosi.nxv32i16.nxv32f32( @vfptosi_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_nxv32i16_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -404,16 +412,22 @@ define @vfptosi_nxv32i16_nxv32f32( %va, ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v28, v16, v0.t +; CHECK-NEXT: vfncvt.rtz.x.f.w v20, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB25_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fptosi.nxv32i16.nxv32f32( %va, %m, i32 %evl) ret %v @@ -440,8 +454,8 @@ define @vfptosi_nxv32i32_nxv32f32( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.fptosi.nxv32i32.nxv32f32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll index a2591e7dc35f0..9fd2d8edb2203 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll @@ -394,7 +394,15 @@ declare @llvm.vp.fptoui.nxv32i16.nxv32f32( @vfptoui_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_nxv32i16_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -404,16 +412,22 @@ define @vfptoui_nxv32i16_nxv32f32( %va, ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v28, v16, v0.t +; CHECK-NEXT: vfncvt.rtz.xu.f.w v20, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB25_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfncvt.rtz.xu.f.w v24, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fptoui.nxv32i16.nxv32f32( %va, %m, i32 %evl) ret %v @@ -440,8 +454,8 @@ define @vfptoui_nxv32i32_nxv32f32( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.fptoui.nxv32i32.nxv32f32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll index 0c3abe37af27a..d2219cf963596 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -113,16 +113,16 @@ define @vfptrunc_nxv16f32_nxv16f64( ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb @@ -169,11 +169,11 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: sub a6, a5, a1 ; CHECK-NEXT: sltu a7, a5, a6 ; CHECK-NEXT: addi a7, a7, -1 -; CHECK-NEXT: and a6, a7, a6 -; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma ; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v16, a3 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: and a0, a7, a6 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t ; CHECK-NEXT: bltu a5, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: @@ -181,8 +181,8 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v7, a3 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t ; CHECK-NEXT: bltu a2, a4, .LBB8_4 ; CHECK-NEXT: # %bb.3: @@ -192,22 +192,22 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: sltu a3, a2, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB8_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB8_6: -; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll index d6caad15e40a2..bd229e0220a4b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -250,8 +250,8 @@ define @vfsqrt_vv_nxv32f16( %va, @vfsqrt_vv_nxv16f64( %va, @llvm.vp.sqrt.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 2eae18d7cc493..c833f8048fe32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -525,9 +525,9 @@ define @vfsub_vv_nxv32f16( %va, @vfsub_vv_nxv32f16( %va, @vfsub_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -621,12 +621,20 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfsub_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -638,7 +646,9 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -648,11 +658,15 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v3 ; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll index 78f3792dbaf06..f9d992a40299c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll @@ -671,9 +671,9 @@ define @vfmacc_vv_nxv16f32( %a, @vfnmacc_vv_nxv16f32( %a, @vfnmacc_vf_nxv16f32_commute( % ; ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_commute: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv4r.v v24, v8 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v8, fa5 +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfnmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vfnmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v24 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll index 2797ca2eb3163..0ad7be47bcc8e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll @@ -601,9 +601,9 @@ define @vfnmsac_vv_nxv16f32( %a, @vfnmsac_vf_nxv16f32_commute( % ; ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_commute: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv4r.v v24, v8 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v8, fa5 +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfnmsub.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vfnmsub.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v24 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll index 5d0172430d15c..77ef0a340270f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll @@ -449,12 +449,12 @@ define @vsitofp_nxv32i1_nxv32f16( %va) { ; ZVFHMIN-NEXT: vmv.v.i v12, 0 ; ZVFHMIN-NEXT: vmerge.vim v8, v12, -1, v0 ; ZVFHMIN-NEXT: vfwcvt.f.x.v v16, v8 -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: srli a0, a0, 2 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a0 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: vmerge.vim v12, v12, -1, v0 ; ZVFHMIN-NEXT: vfwcvt.f.x.v v16, v12 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 @@ -478,12 +478,12 @@ define @vuitofp_nxv32i1_nxv32f16( %va) { ; ZVFHMIN-NEXT: vmv.v.i v12, 0 ; ZVFHMIN-NEXT: vmerge.vim v8, v12, 1, v0 ; ZVFHMIN-NEXT: vfwcvt.f.xu.v v16, v8 -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: srli a0, a0, 2 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a0 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: vmerge.vim v12, v12, 1, v0 ; ZVFHMIN-NEXT: vfwcvt.f.xu.v v16, v12 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index a35fc874065a7..8a76467986620 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -423,8 +423,8 @@ define @vmax_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -986,8 +986,8 @@ define @vmax_vx_nxv32i32( %va, i32 %b, poison, i32 %b, i32 0 @@ -1046,8 +1046,8 @@ define @vmax_vx_nxv32i32_evl_nx8( %va, i3 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1084,8 +1084,8 @@ define @vmax_vx_nxv32i32_evl_nx16( %va, i ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vmax.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmax.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index 1f620a44dbbc8..1c74887c1b20f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -425,8 +425,8 @@ define @vmaxu_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -988,8 +988,8 @@ define @vmaxu_vx_nxv32i32( %va, i32 %b, < ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB80_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1048,8 +1048,8 @@ define @vmaxu_vx_nxv32i32_evl_nx8( %va, i ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1086,8 +1086,8 @@ define @vmaxu_vx_nxv32i32_evl_nx16( %va, ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmaxu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll index e7184921d87a0..2e5b67c93fce1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmfeq.mask.nxv1f16( define @intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmfeq.mask.nxv2f16( define @intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmfeq.mask.nxv4f16( define @intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmfeq.mask.nxv8f16( define @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfeq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmfeq.mask.nxv16f16( define @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfeq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmfeq.mask.nxv1f32( define @intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmfeq.mask.nxv2f32( define @intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmfeq.mask.nxv4f32( define @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfeq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmfeq.mask.nxv8f32( define @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfeq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmfeq.mask.nxv1f64( define @intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmfeq.mask.nxv2f64( define @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfeq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmfeq.mask.nxv4f64( define @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfeq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmfeq_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmfeq_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmfeq_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmfeq_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmfeq_mask_vf_nxv16f16_f16( @intrinsic_vmfeq_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmfeq_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmfeq_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmfeq_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmfeq_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmfeq_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmfeq_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll index a6dad9eaa4f35..b5ca47707c8a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmfge.mask.nxv1f16( define @intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmfge.mask.nxv2f16( define @intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmfge.mask.nxv4f16( define @intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmfge.mask.nxv8f16( define @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmfle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmfge.mask.nxv16f16( define @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmfle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmfge.mask.nxv1f32( define @intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmfge.mask.nxv2f32( define @intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmfge.mask.nxv4f32( define @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmfle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmfge.mask.nxv8f32( define @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmfle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmfge.mask.nxv1f64( define @intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmfge.mask.nxv2f64( define @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmfle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmfge.mask.nxv4f64( define @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmfle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmfge_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmfge_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmfge_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmfge_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmfge_mask_vf_nxv16f16_f16( @intrinsic_vmfge_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmfge_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmfge_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmfge_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmfge_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmfge_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmfge_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll index f643a4036381c..971249d38d1b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmfgt.mask.nxv1f16( define @intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmfgt.mask.nxv2f16( define @intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmfgt.mask.nxv4f16( define @intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmfgt.mask.nxv8f16( define @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmflt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmfgt.mask.nxv16f16( define @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmflt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmfgt.mask.nxv1f32( define @intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmfgt.mask.nxv2f32( define @intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmfgt.mask.nxv4f32( define @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmflt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmfgt.mask.nxv8f32( define @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmflt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmfgt.mask.nxv1f64( define @intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmfgt.mask.nxv2f64( define @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmflt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmfgt.mask.nxv4f64( define @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmflt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmfgt_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmfgt_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmfgt_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmfgt_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmfgt_mask_vf_nxv16f16_f16( @intrinsic_vmfgt_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmfgt_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmfgt_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmfgt_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmfgt_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmfgt_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmfgt_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll index 6c52364c1fbd5..f19a181a365af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmfle.mask.nxv1f16( define @intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmfle.mask.nxv2f16( define @intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmfle.mask.nxv4f16( define @intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmfle.mask.nxv8f16( define @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmfle.mask.nxv16f16( define @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmfle.mask.nxv1f32( define @intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmfle.mask.nxv2f32( define @intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmfle.mask.nxv4f32( define @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmfle.mask.nxv8f32( define @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmfle.mask.nxv1f64( define @intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmfle.mask.nxv2f64( define @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmfle.mask.nxv4f64( define @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmfle_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmfle_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmfle_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmfle_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmfle_mask_vf_nxv16f16_f16( @intrinsic_vmfle_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmfle_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmfle_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmfle_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmfle_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmfle_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmfle_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll index 37a9c6b081a1d..0a04642219334 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmflt.mask.nxv1f16( define @intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmflt.mask.nxv2f16( define @intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmflt.mask.nxv4f16( define @intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmflt.mask.nxv8f16( define @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmflt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmflt.mask.nxv16f16( define @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmflt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmflt.mask.nxv1f32( define @intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmflt.mask.nxv2f32( define @intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmflt.mask.nxv4f32( define @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmflt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmflt.mask.nxv8f32( define @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmflt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmflt.mask.nxv1f64( define @intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmflt.mask.nxv2f64( define @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmflt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmflt.mask.nxv4f64( define @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmflt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmflt_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmflt_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmflt_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmflt_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmflt_mask_vf_nxv16f16_f16( @intrinsic_vmflt_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmflt_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmflt_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmflt_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmflt_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmflt_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmflt_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll index 5defce42091e5..520099247e0f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmfne.mask.nxv1f16( define @intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmfne.mask.nxv2f16( define @intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmfne.mask.nxv4f16( define @intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -187,11 +190,12 @@ declare @llvm.riscv.vmfne.mask.nxv8f16( define @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv8f16( @@ -238,11 +242,12 @@ declare @llvm.riscv.vmfne.mask.nxv16f16( define @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv16f16( @@ -289,9 +294,10 @@ declare @llvm.riscv.vmfne.mask.nxv1f32( define @intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -340,9 +346,10 @@ declare @llvm.riscv.vmfne.mask.nxv2f32( define @intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -391,11 +398,12 @@ declare @llvm.riscv.vmfne.mask.nxv4f32( define @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv4f32( @@ -442,11 +450,12 @@ declare @llvm.riscv.vmfne.mask.nxv8f32( define @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv8f32( @@ -493,9 +502,10 @@ declare @llvm.riscv.vmfne.mask.nxv1f64( define @intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -544,11 +554,12 @@ declare @llvm.riscv.vmfne.mask.nxv2f64( define @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmfne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv2f64( @@ -595,11 +606,12 @@ declare @llvm.riscv.vmfne.mask.nxv4f64( define @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmfne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv4f64( @@ -647,8 +659,8 @@ define @intrinsic_vmfne_mask_vf_nxv1f16_f16( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -694,8 +706,8 @@ define @intrinsic_vmfne_mask_vf_nxv2f16_f16( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -741,8 +753,8 @@ define @intrinsic_vmfne_mask_vf_nxv4f16_f16( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -788,8 +800,8 @@ define @intrinsic_vmfne_mask_vf_nxv8f16_f16( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -835,8 +847,8 @@ define @intrinsic_vmfne_mask_vf_nxv16f16_f16( @intrinsic_vmfne_mask_vf_nxv1f32_f32( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -929,8 +941,8 @@ define @intrinsic_vmfne_mask_vf_nxv2f32_f32( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -976,8 +988,8 @@ define @intrinsic_vmfne_mask_vf_nxv4f32_f32( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1023,8 +1035,8 @@ define @intrinsic_vmfne_mask_vf_nxv8f32_f32( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1070,8 +1082,8 @@ define @intrinsic_vmfne_mask_vf_nxv1f64_f64( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1117,8 +1129,8 @@ define @intrinsic_vmfne_mask_vf_nxv2f64_f64( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1164,8 +1176,8 @@ define @intrinsic_vmfne_mask_vf_nxv4f64_f64( ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 8fabf93356aeb..1c71242c3c7d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -423,8 +423,8 @@ define @vmin_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -986,8 +986,8 @@ define @vmin_vx_nxv32i32( %va, i32 %b, poison, i32 %b, i32 0 @@ -1046,8 +1046,8 @@ define @vmin_vx_nxv32i32_evl_nx8( %va, i3 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1084,8 +1084,8 @@ define @vmin_vx_nxv32i32_evl_nx16( %va, i ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vmin.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmin.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index 8ec85e545a0f8..6d89a9777cf91 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -425,8 +425,8 @@ define @vminu_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -988,8 +988,8 @@ define @vminu_vx_nxv32i32( %va, i32 %b, < ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB80_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1048,8 +1048,8 @@ define @vminu_vx_nxv32i32_evl_nx8( %va, i ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1086,8 +1086,8 @@ define @vminu_vx_nxv32i32_evl_nx16( %va, ; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vminu.vx v8, v8, a0, v0.t -; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vminu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll index 2d6e958fcd0ba..14a1f084c3985 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll @@ -32,8 +32,8 @@ define @intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1( ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmsbf.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -74,8 +74,8 @@ define @intrinsic_vmsbf_mask_m_nxv2i1_nxv2i1( ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmsbf.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -116,8 +116,8 @@ define @intrinsic_vmsbf_mask_m_nxv4i1_nxv4i1( ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmsbf.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -158,8 +158,8 @@ define @intrinsic_vmsbf_mask_m_nxv8i1_nxv8i1( ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmsbf.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -200,8 +200,8 @@ define @intrinsic_vmsbf_mask_m_nxv16i1_nxv16i1( @intrinsic_vmsbf_mask_m_nxv32i1_nxv32i1( @intrinsic_vmsbf_mask_m_nxv64i1_nxv64i1( @llvm.riscv.vmseq.mask.nxv1i8( define @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmseq.mask.nxv2i8( define @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmseq.mask.nxv4i8( define @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmseq.mask.nxv8i8( define @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmseq.mask.nxv16i8( define @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmseq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmseq.mask.nxv32i8( define @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmseq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmseq.mask.nxv1i16( define @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmseq.mask.nxv2i16( define @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmseq.mask.nxv4i16( define @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmseq.mask.nxv8i16( define @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmseq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmseq.mask.nxv16i16( define @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmseq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmseq.mask.nxv1i32( define @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmseq.mask.nxv2i32( define @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmseq.mask.nxv4i32( define @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmseq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmseq.mask.nxv8i32( define @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmseq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmseq.mask.nxv1i64( define @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmseq.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmseq.mask.nxv2i64( define @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmseq.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmseq.mask.nxv4i64( define @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmseq.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmseq_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmseq_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmseq_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmseq_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmseq_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1188,8 +1206,8 @@ define @intrinsic_vmseq_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1235,8 +1253,8 @@ define @intrinsic_vmseq_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmseq_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmseq_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmseq_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmseq_mask_vx_nxv16i16_i16( @intrinsic_vmseq_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmseq_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmseq_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmseq_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmseq_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmseq.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmseq_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmseq.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmseq_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmseq.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmseq_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmseq_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmseq_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmseq_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmseq_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2043,8 +2061,8 @@ define @intrinsic_vmseq_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2078,8 +2096,8 @@ define @intrinsic_vmseq_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmseq_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmseq_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmseq_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmseq_mask_vi_nxv16i16_i16( @intrinsic_vmseq_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmseq_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmseq_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmseq_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmseq_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmseq_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmseq_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll index c8f9b60a3f2da..75fc407abbc2f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsge.mask.nxv1i8( define @intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsge.mask.nxv2i8( define @intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsge.mask.nxv4i8( define @intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsge.mask.nxv8i8( define @intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsge.mask.nxv16i8( define @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v10, v8 -; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsge.mask.nxv32i8( define @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v12, v8 -; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsge.mask.nxv1i16( define @intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsge.mask.nxv2i16( define @intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsge.mask.nxv4i16( define @intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsge.mask.nxv8i16( define @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v10, v8 -; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsge.mask.nxv16i16( define @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v12, v8 -; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsge.mask.nxv1i32( define @intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsge.mask.nxv2i32( define @intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsge.mask.nxv4i32( define @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v10, v8 -; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsge.mask.nxv8i32( define @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v12, v8 -; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsge.mask.nxv1i64( define @intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsge.mask.nxv2i64( define @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v10, v8 -; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsle.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsge.mask.nxv4i64( define @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v12, v8 -; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsle.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv4i64( @@ -954,8 +972,8 @@ define @intrinsic_vmsge_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1002,8 +1020,8 @@ define @intrinsic_vmsge_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1050,8 +1068,8 @@ define @intrinsic_vmsge_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1098,8 +1116,8 @@ define @intrinsic_vmsge_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1146,8 +1164,8 @@ define @intrinsic_vmsge_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v11, v10 ; CHECK-NEXT: ret @@ -1194,8 +1212,8 @@ define @intrinsic_vmsge_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v13, v12 ; CHECK-NEXT: ret @@ -1242,8 +1260,8 @@ define @intrinsic_vmsge_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1290,8 +1308,8 @@ define @intrinsic_vmsge_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1338,8 +1356,8 @@ define @intrinsic_vmsge_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1386,8 +1404,8 @@ define @intrinsic_vmsge_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v11, v10 ; CHECK-NEXT: ret @@ -1434,8 +1452,8 @@ define @intrinsic_vmsge_mask_vx_nxv16i16_i16( @intrinsic_vmsge_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1530,8 +1548,8 @@ define @intrinsic_vmsge_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1578,8 +1596,8 @@ define @intrinsic_vmsge_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v11, v10 ; CHECK-NEXT: ret @@ -1626,8 +1644,8 @@ define @intrinsic_vmsge_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v13, v12 ; CHECK-NEXT: ret @@ -1701,8 +1719,8 @@ define @intrinsic_vmsge_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmslt.vx v10, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v10, v9 ; RV64-NEXT: ret @@ -1776,8 +1794,8 @@ define @intrinsic_vmsge_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmslt.vx v11, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v11, v10 ; RV64-NEXT: ret @@ -1851,8 +1869,8 @@ define @intrinsic_vmsge_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmslt.vx v13, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v13, v12 ; RV64-NEXT: ret @@ -1886,8 +1904,8 @@ define @intrinsic_vmsge_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1921,8 +1939,8 @@ define @intrinsic_vmsge_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1956,8 +1974,8 @@ define @intrinsic_vmsge_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1991,8 +2009,8 @@ define @intrinsic_vmsge_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2026,8 +2044,8 @@ define @intrinsic_vmsge_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2061,8 +2079,8 @@ define @intrinsic_vmsge_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2096,8 +2114,8 @@ define @intrinsic_vmsge_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2131,8 +2149,8 @@ define @intrinsic_vmsge_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, -1, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2166,8 +2184,8 @@ define @intrinsic_vmsge_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2201,8 +2219,8 @@ define @intrinsic_vmsge_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2236,8 +2254,8 @@ define @intrinsic_vmsge_mask_vi_nxv16i16_i16( @intrinsic_vmsge_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2306,8 +2324,8 @@ define @intrinsic_vmsge_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2341,8 +2359,8 @@ define @intrinsic_vmsge_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2376,8 +2394,8 @@ define @intrinsic_vmsge_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2411,8 +2429,8 @@ define @intrinsic_vmsge_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2446,8 +2464,8 @@ define @intrinsic_vmsge_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2481,8 +2499,8 @@ define @intrinsic_vmsge_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll index b6c6d9e90f610..5568c1e9b1cfb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i8( define @intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i8( define @intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i8( define @intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i8( define @intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i8( define @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v10, v8 -; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsleu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv32i8( define @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v12, v8 -; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsleu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i16( define @intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i16( define @intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i16( define @intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i16( define @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v10, v8 -; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsleu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i16( define @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v12, v8 -; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsleu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i32( define @intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i32( define @intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i32( define @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v10, v8 -; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsleu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i32( define @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v12, v8 -; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsleu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i64( define @intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vmsleu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i64( define @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v10, v8 -; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsleu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i64( define @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v12, v8 -; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsleu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv4i64( @@ -954,8 +972,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1002,8 +1020,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1050,8 +1068,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1098,8 +1116,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1146,8 +1164,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv16i8_i8( @intrinsic_vmsgeu_mask_vx_nxv32i8_i8( @intrinsic_vmsgeu_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1290,8 +1308,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1338,8 +1356,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1386,8 +1404,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v11, v10 ; CHECK-NEXT: ret @@ -1434,8 +1452,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv16i16_i16( @intrinsic_vmsgeu_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1530,8 +1548,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret @@ -1578,8 +1596,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v11, v10 ; CHECK-NEXT: ret @@ -1626,8 +1644,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmxor.mm v0, v13, v12 ; CHECK-NEXT: ret @@ -1701,8 +1719,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsltu.vx v10, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v10, v9 ; RV64-NEXT: ret @@ -1776,8 +1794,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsltu.vx v11, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v11, v10 ; RV64-NEXT: ret @@ -1851,8 +1869,8 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsltu.vx v13, v8, a0, v0.t ; RV64-NEXT: vmxor.mm v0, v13, v12 ; RV64-NEXT: ret @@ -1886,8 +1904,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1921,8 +1939,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1956,8 +1974,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1991,8 +2009,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, -9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2026,8 +2044,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv16i8_i8( @intrinsic_vmsgeu_mask_vi_nxv32i8_i8( @intrinsic_vmsgeu_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2178,8 +2196,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2213,8 +2231,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2248,8 +2266,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv16i16_i16( @intrinsic_vmsgeu_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2318,8 +2336,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2353,8 +2371,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2388,8 +2406,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v13, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2423,8 +2441,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 14, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2458,8 +2476,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, -16, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2493,8 +2511,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v13, v8, -14, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll index dfd7096a65ebb..f1fa6484d976b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsgt.mask.nxv1i8( define @intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsgt.mask.nxv2i8( define @intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsgt.mask.nxv4i8( define @intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsgt.mask.nxv8i8( define @intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsgt.mask.nxv16i8( define @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmslt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsgt.mask.nxv32i8( define @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmslt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsgt.mask.nxv1i16( define @intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsgt.mask.nxv2i16( define @intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsgt.mask.nxv4i16( define @intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsgt.mask.nxv8i16( define @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmslt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsgt.mask.nxv16i16( define @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmslt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsgt.mask.nxv1i32( define @intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsgt.mask.nxv2i32( define @intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsgt.mask.nxv4i32( define @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmslt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsgt.mask.nxv8i32( define @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmslt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsgt.mask.nxv1i64( define @intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v9, v8 +; CHECK-NEXT: vmslt.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsgt.mask.nxv2i64( define @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmslt.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsgt.mask.nxv4i64( define @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmslt.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsgt_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsgt_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsgt_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsgt_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsgt_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1188,8 +1206,8 @@ define @intrinsic_vmsgt_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmsgt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1235,8 +1253,8 @@ define @intrinsic_vmsgt_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsgt_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsgt_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsgt_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsgt_mask_vx_nxv16i16_i16( @intrinsic_vmsgt_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsgt_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsgt_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsgt_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsgt_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsgt.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsgt_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsgt.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsgt_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsgt.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsgt_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsgt_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsgt_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsgt_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsgt_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2043,8 +2061,8 @@ define @intrinsic_vmsgt_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2078,8 +2096,8 @@ define @intrinsic_vmsgt_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsgt_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsgt_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsgt_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsgt_mask_vi_nxv16i16_i16( @intrinsic_vmsgt_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsgt_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsgt_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsgt_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsgt_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsgt.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsgt_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsgt_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll index 8826be03bbebb..de7a0ad87be27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i8( define @intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i8( define @intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i8( define @intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i8( define @intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i8( define @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsltu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv32i8( define @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsltu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i16( define @intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i16( define @intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i16( define @intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i16( define @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsltu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i16( define @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsltu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i32( define @intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i32( define @intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i32( define @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsltu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i32( define @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsltu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i64( define @intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v9, v8 +; CHECK-NEXT: vmsltu.vv v8, v9, v8 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i64( define @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmsltu.vv v14, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i64( define @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmsltu.vv v20, v12, v8 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv16i8_i8( @intrinsic_vmsgtu_mask_vx_nxv32i8_i8( @intrinsic_vmsgtu_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsgtu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv16i16_i16( @intrinsic_vmsgtu_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsgtu.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsgtu.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsgtu.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv16i8_i8( @intrinsic_vmsgtu_mask_vi_nxv32i8_i8( @intrinsic_vmsgtu_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv16i16_i16( @intrinsic_vmsgtu_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsgtu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsgtu_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll index 8ce9a3020b7a5..05d402afc934c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsif.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll @@ -32,8 +32,8 @@ define @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1( ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmsif.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -74,8 +74,8 @@ define @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1( ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmsif.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -116,8 +116,8 @@ define @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1( ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmsif.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -158,8 +158,8 @@ define @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1( ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmsif.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -200,8 +200,8 @@ define @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1( @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1( @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1( @llvm.riscv.vmsle.mask.nxv1i8( define @intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsle.mask.nxv2i8( define @intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsle.mask.nxv4i8( define @intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsle.mask.nxv8i8( define @intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsle.mask.nxv16i8( define @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsle.mask.nxv32i8( define @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsle.mask.nxv1i16( define @intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsle.mask.nxv2i16( define @intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsle.mask.nxv4i16( define @intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsle.mask.nxv8i16( define @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsle.mask.nxv16i16( define @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsle.mask.nxv1i32( define @intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsle.mask.nxv2i32( define @intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsle.mask.nxv4i32( define @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsle.mask.nxv8i32( define @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsle.mask.nxv1i64( define @intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsle.mask.nxv2i64( define @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsle.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsle.mask.nxv4i64( define @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsle.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsle_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsle_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsle_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsle_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsle_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1188,8 +1206,8 @@ define @intrinsic_vmsle_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmsle.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1235,8 +1253,8 @@ define @intrinsic_vmsle_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsle_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsle_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsle_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsle_mask_vx_nxv16i16_i16( @intrinsic_vmsle_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsle_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsle_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsle_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsle_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsle.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsle_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsle.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsle_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsle.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsle_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsle_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsle_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsle_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsle_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2043,8 +2061,8 @@ define @intrinsic_vmsle_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2078,8 +2096,8 @@ define @intrinsic_vmsle_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsle_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsle_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsle_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsle_mask_vi_nxv16i16_i16( @intrinsic_vmsle_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsle_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsle_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsle_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsle_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsle_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsle_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll index c58ac2d071831..540577247484e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsleu.mask.nxv1i8( define @intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsleu.mask.nxv2i8( define @intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsleu.mask.nxv4i8( define @intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsleu.mask.nxv8i8( define @intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsleu.mask.nxv16i8( define @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsleu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsleu.mask.nxv32i8( define @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsleu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsleu.mask.nxv1i16( define @intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsleu.mask.nxv2i16( define @intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsleu.mask.nxv4i16( define @intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsleu.mask.nxv8i16( define @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsleu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsleu.mask.nxv16i16( define @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsleu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsleu.mask.nxv1i32( define @intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsleu.mask.nxv2i32( define @intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsleu.mask.nxv4i32( define @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsleu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsleu.mask.nxv8i32( define @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsleu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsleu.mask.nxv1i64( define @intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v9 +; CHECK-NEXT: vmsleu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsleu.mask.nxv2i64( define @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsleu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsleu.mask.nxv4i64( define @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsleu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsleu_mask_vx_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsleu_mask_vx_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsleu_mask_vx_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsleu_mask_vx_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsleu_mask_vx_nxv16i8_i8( @intrinsic_vmsleu_mask_vx_nxv32i8_i8( @intrinsic_vmsleu_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsleu_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsleu_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsleu_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsleu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsleu_mask_vx_nxv16i16_i16( @intrinsic_vmsleu_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsleu_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsleu_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsleu_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsleu_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsleu.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsleu_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsleu.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsleu_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsleu.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsleu_mask_vi_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsleu_mask_vi_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsleu_mask_vi_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsleu_mask_vi_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsleu_mask_vi_nxv16i8_i8( @intrinsic_vmsleu_mask_vi_nxv32i8_i8( @intrinsic_vmsleu_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsleu_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsleu_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsleu_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsleu_mask_vi_nxv16i16_i16( @intrinsic_vmsleu_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsleu_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsleu_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsleu_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsleu_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsleu_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsleu_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll index 6c6e580b043d1..554d25172d4fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmslt.mask.nxv1i8( define @intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmslt.mask.nxv2i8( define @intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmslt.mask.nxv4i8( define @intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmslt.mask.nxv8i8( define @intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmslt.mask.nxv16i8( define @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmslt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmslt.mask.nxv32i8( define @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmslt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmslt.mask.nxv1i16( define @intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmslt.mask.nxv2i16( define @intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmslt.mask.nxv4i16( define @intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmslt.mask.nxv8i16( define @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmslt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmslt.mask.nxv16i16( define @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmslt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmslt.mask.nxv1i32( define @intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmslt.mask.nxv2i32( define @intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmslt.mask.nxv4i32( define @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmslt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmslt.mask.nxv8i32( define @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmslt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmslt.mask.nxv1i64( define @intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v9 +; CHECK-NEXT: vmslt.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmslt.mask.nxv2i64( define @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmslt.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmslt.mask.nxv4i64( define @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmslt.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmslt_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmslt_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmslt_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmslt_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmslt_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1188,8 +1206,8 @@ define @intrinsic_vmslt_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1235,8 +1253,8 @@ define @intrinsic_vmslt_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmslt_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmslt_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmslt_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmslt_mask_vx_nxv16i16_i16( @intrinsic_vmslt_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmslt_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmslt_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmslt_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmslt_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmslt.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmslt_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmslt.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmslt_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmslt.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmslt_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmslt_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmslt_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmslt_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, -9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmslt_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2043,8 +2061,8 @@ define @intrinsic_vmslt_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2078,8 +2096,8 @@ define @intrinsic_vmslt_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmslt_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmslt.vx v10, v8, zero, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmslt_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmslt_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmslt_mask_vi_nxv16i16_i16( @intrinsic_vmslt_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmslt_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmslt_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmslt_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmslt_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsle.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmslt_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsle.vi v11, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmslt_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vi v13, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll index 76f3e449ab58f..7a8efa6c80fb6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsltu.mask.nxv1i8( define @intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsltu.mask.nxv2i8( define @intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsltu.mask.nxv4i8( define @intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsltu.mask.nxv8i8( define @intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsltu.mask.nxv16i8( define @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsltu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsltu.mask.nxv32i8( define @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsltu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsltu.mask.nxv1i16( define @intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsltu.mask.nxv2i16( define @intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsltu.mask.nxv4i16( define @intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsltu.mask.nxv8i16( define @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsltu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsltu.mask.nxv16i16( define @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsltu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsltu.mask.nxv1i32( define @intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsltu.mask.nxv2i32( define @intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsltu.mask.nxv4i32( define @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsltu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsltu.mask.nxv8i32( define @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsltu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsltu.mask.nxv1i64( define @intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v9 +; CHECK-NEXT: vmsltu.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsltu.mask.nxv2i64( define @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsltu.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsltu.mask.nxv4i64( define @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsltu.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsltu_mask_vx_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsltu_mask_vx_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsltu_mask_vx_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsltu_mask_vx_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsltu_mask_vx_nxv16i8_i8( @intrinsic_vmsltu_mask_vx_nxv32i8_i8( @intrinsic_vmsltu_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsltu_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsltu_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsltu_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsltu_mask_vx_nxv16i16_i16( @intrinsic_vmsltu_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsltu_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsltu_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsltu_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsltu_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsltu.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsltu_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsltu.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsltu_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsltu.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsltu_mask_vi_nxv1i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsltu_mask_vi_nxv2i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsltu_mask_vi_nxv4i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsltu_mask_vi_nxv8i8_i8( % ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, -9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsltu_mask_vi_nxv16i8_i8( @intrinsic_vmsltu_mask_vi_nxv32i8_i8( @intrinsic_vmsltu_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsltu_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsltu.vx v10, v8, zero, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsltu_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsltu_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsltu_mask_vi_nxv16i16_i16( @intrinsic_vmsltu_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsltu_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsltu_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsltu_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v13, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsltu_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsleu.vi v10, v8, 14, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsltu_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsleu.vi v11, v8, -16, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsltu_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vi v13, v8, -14, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll index 161c1bc4314fc..bd6bd8a804bcc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll @@ -34,9 +34,10 @@ declare @llvm.riscv.vmsne.mask.nxv1i8( define @intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -85,9 +86,10 @@ declare @llvm.riscv.vmsne.mask.nxv2i8( define @intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -136,9 +138,10 @@ declare @llvm.riscv.vmsne.mask.nxv4i8( define @intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -187,9 +190,10 @@ declare @llvm.riscv.vmsne.mask.nxv8i8( define @intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -238,11 +242,12 @@ declare @llvm.riscv.vmsne.mask.nxv16i8( define @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv16i8( @@ -289,11 +294,12 @@ declare @llvm.riscv.vmsne.mask.nxv32i8( define @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv32i8( @@ -340,9 +346,10 @@ declare @llvm.riscv.vmsne.mask.nxv1i16( define @intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -391,9 +398,10 @@ declare @llvm.riscv.vmsne.mask.nxv2i16( define @intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -442,9 +450,10 @@ declare @llvm.riscv.vmsne.mask.nxv4i16( define @intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -493,11 +502,12 @@ declare @llvm.riscv.vmsne.mask.nxv8i16( define @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv8i16( @@ -544,11 +554,12 @@ declare @llvm.riscv.vmsne.mask.nxv16i16( define @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv16i16( @@ -595,9 +606,10 @@ declare @llvm.riscv.vmsne.mask.nxv1i32( define @intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -646,9 +658,10 @@ declare @llvm.riscv.vmsne.mask.nxv2i32( define @intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -697,11 +710,12 @@ declare @llvm.riscv.vmsne.mask.nxv4i32( define @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv4i32( @@ -748,11 +762,12 @@ declare @llvm.riscv.vmsne.mask.nxv8i32( define @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv8i32( @@ -799,9 +814,10 @@ declare @llvm.riscv.vmsne.mask.nxv1i64( define @intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -850,11 +866,12 @@ declare @llvm.riscv.vmsne.mask.nxv2i64( define @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmsne.vv v14, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv2i64( @@ -901,11 +918,12 @@ declare @llvm.riscv.vmsne.mask.nxv4i64( define @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmsne.vv v20, v8, v12 +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv4i64( @@ -953,8 +971,8 @@ define @intrinsic_vmsne_mask_vx_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1000,8 +1018,8 @@ define @intrinsic_vmsne_mask_vx_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1047,8 +1065,8 @@ define @intrinsic_vmsne_mask_vx_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1094,8 +1112,8 @@ define @intrinsic_vmsne_mask_vx_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1141,8 +1159,8 @@ define @intrinsic_vmsne_mask_vx_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1188,8 +1206,8 @@ define @intrinsic_vmsne_mask_vx_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmsne.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1235,8 +1253,8 @@ define @intrinsic_vmsne_mask_vx_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1282,8 +1300,8 @@ define @intrinsic_vmsne_mask_vx_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1329,8 +1347,8 @@ define @intrinsic_vmsne_mask_vx_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1376,8 +1394,8 @@ define @intrinsic_vmsne_mask_vx_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1423,8 +1441,8 @@ define @intrinsic_vmsne_mask_vx_nxv16i16_i16( @intrinsic_vmsne_mask_vx_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1517,8 +1535,8 @@ define @intrinsic_vmsne_mask_vx_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -1564,8 +1582,8 @@ define @intrinsic_vmsne_mask_vx_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -1611,8 +1629,8 @@ define @intrinsic_vmsne_mask_vx_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vx v13, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -1685,8 +1703,8 @@ define @intrinsic_vmsne_mask_vx_nxv1i64_i64( ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmsne.vx v10, v8, a0, v0.t ; RV64-NEXT: vmv.v.v v0, v10 ; RV64-NEXT: ret @@ -1759,8 +1777,8 @@ define @intrinsic_vmsne_mask_vx_nxv2i64_i64( ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v11, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmsne.vx v11, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v11 ; RV64-NEXT: ret @@ -1833,8 +1851,8 @@ define @intrinsic_vmsne_mask_vx_nxv4i64_i64( ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v13, v0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmsne.vx v13, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: ret @@ -1868,8 +1886,8 @@ define @intrinsic_vmsne_mask_vi_nxv1i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1903,8 +1921,8 @@ define @intrinsic_vmsne_mask_vi_nxv2i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1938,8 +1956,8 @@ define @intrinsic_vmsne_mask_vi_nxv4i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1973,8 +1991,8 @@ define @intrinsic_vmsne_mask_vi_nxv8i8_i8( %0 ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2008,8 +2026,8 @@ define @intrinsic_vmsne_mask_vi_nxv16i8_i8( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2043,8 +2061,8 @@ define @intrinsic_vmsne_mask_vi_nxv32i8_i8( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2078,8 +2096,8 @@ define @intrinsic_vmsne_mask_vi_nxv1i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2113,8 +2131,8 @@ define @intrinsic_vmsne_mask_vi_nxv2i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,8 +2166,8 @@ define @intrinsic_vmsne_mask_vi_nxv4i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2183,8 +2201,8 @@ define @intrinsic_vmsne_mask_vi_nxv8i16_i16( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2218,8 +2236,8 @@ define @intrinsic_vmsne_mask_vi_nxv16i16_i16( @intrinsic_vmsne_mask_vi_nxv1i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2288,8 +2306,8 @@ define @intrinsic_vmsne_mask_vi_nxv2i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2323,8 +2341,8 @@ define @intrinsic_vmsne_mask_vi_nxv4i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2358,8 +2376,8 @@ define @intrinsic_vmsne_mask_vi_nxv8i32_i32( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret @@ -2393,8 +2411,8 @@ define @intrinsic_vmsne_mask_vi_nxv1i64_i64( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsne.vi v10, v8, 9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret @@ -2428,8 +2446,8 @@ define @intrinsic_vmsne_mask_vi_nxv2i64_i64( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -2463,8 +2481,8 @@ define @intrinsic_vmsne_mask_vi_nxv4i64_i64( ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll index f6f90eddcd8c5..0c60681ea8de0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsof.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll @@ -32,8 +32,8 @@ define @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1( ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu ; CHECK-NEXT: vmsof.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -74,8 +74,8 @@ define @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1( ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu ; CHECK-NEXT: vmsof.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -116,8 +116,8 @@ define @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1( ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu ; CHECK-NEXT: vmsof.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -158,8 +158,8 @@ define @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1( ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu ; CHECK-NEXT: vmsof.m v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -200,8 +200,8 @@ define @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1( @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1( @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1( %src, %m, i32 %evl) { ; RV32-LABEL: bool_vec: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV32-NEXT: vfirst.m a1, v9, v0.t ; RV32-NEXT: bltz a1, .LBB0_2 ; RV32-NEXT: # %bb.1: @@ -20,8 +20,8 @@ define iXLen @bool_vec( %src, %m, i32 %evl) { ; RV64-NEXT: vmv1r.v v9, v0 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vfirst.m a1, v9, v0.t ; RV64-NEXT: bltz a1, .LBB0_2 ; RV64-NEXT: # %bb.1: @@ -36,8 +36,8 @@ define iXLen @bool_vec_zero_poison( %src, %m, ; RV32-LABEL: bool_vec_zero_poison: ; RV32: # %bb.0: ; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV32-NEXT: vfirst.m a0, v9, v0.t ; RV32-NEXT: ret ; @@ -46,8 +46,8 @@ define iXLen @bool_vec_zero_poison( %src, %m, ; RV64-NEXT: vmv1r.v v9, v0 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vfirst.m a0, v9, v0.t ; RV64-NEXT: ret %r = call iXLen @llvm.vp.cttz.elts.iXLen.nxv2i1( %src, i1 1, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll index 8b1660283cb7d..d0f2ce1ca8004 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll @@ -450,14 +450,14 @@ define @test_vp_reverse_nxv64i8_masked( %sr ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vrsub.vx v24, v16, a2 +; CHECK-NEXT: vrsub.vx v16, v16, a2 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v20, v8, v24 -; CHECK-NEXT: vrgatherei16.vv v16, v12, v24 +; CHECK-NEXT: vrgatherei16.vv v28, v8, v16 +; CHECK-NEXT: vrgatherei16.vv v24, v12, v16 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub a1, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v16, a1, v0.t +; CHECK-NEXT: vslidedown.vx v8, v24, a1, v0.t ; CHECK-NEXT: ret %dst = call @llvm.experimental.vp.reverse.nxv64i8( %src, %mask, i32 %evl) ret %dst diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll index a30ebf2d33b50..7f81b99eb0338 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll @@ -8,8 +8,8 @@ define <2 x i1> @test_vp_reverse_v2i1_masked(<2 x i1> %src, <2 x i1> %mask, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -45,8 +45,8 @@ define <4 x i1> @test_vp_reverse_v4i1_masked(<4 x i1> %src, <4 x i1> %mask, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -82,8 +82,8 @@ define <8 x i1> @test_vp_reverse_v8i1_masked(<8 x i1> %src, <8 x i1> %mask, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -119,8 +119,8 @@ define <16 x i1> @test_vp_reverse_v16i1_masked(<16 x i1> %src, <16 x i1> %mask, ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll index ceb6a164e20df..acf7d16bda982 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll @@ -7,8 +7,8 @@ define @test_vp_reverse_nxv1i1_masked( %src, ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -44,8 +44,8 @@ define @test_vp_reverse_nxv2i1_masked( %src, ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -81,8 +81,8 @@ define @test_vp_reverse_nxv4i1_masked( %src, ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -118,8 +118,8 @@ define @test_vp_reverse_nxv8i1_masked( %src, ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t @@ -155,8 +155,8 @@ define @test_vp_reverse_nxv16i1_masked( %sr ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vid.v v12, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t @@ -193,8 +193,8 @@ define @test_vp_reverse_nxv32i1_masked( %sr ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t @@ -242,8 +242,8 @@ define @test_vp_reverse_nxv64i1_masked( %sr ; CHECK-NEXT: vrgatherei16.vv v16, v28, v0 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub a1, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v16, a1, v0.t ; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 @@ -263,14 +263,14 @@ define @test_vp_reverse_nxv64i1( %src, i32 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vrsub.vx v24, v16, a2 +; CHECK-NEXT: vrsub.vx v16, v16, a2 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v20, v8, v24 -; CHECK-NEXT: vrgatherei16.vv v16, v12, v24 +; CHECK-NEXT: vrgatherei16.vv v28, v8, v16 +; CHECK-NEXT: vrgatherei16.vv v24, v12, v16 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub a1, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v16, a1 +; CHECK-NEXT: vslidedown.vx v8, v24, a1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll index ce0ae2022885a..9496cd82947d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll @@ -68,8 +68,8 @@ define <2 x i1> @test_vp_splice_v2i1_masked(<2 x i1> %va, <2 x i1> %vb, <2 x i1> ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t @@ -141,8 +141,8 @@ define <4 x i1> @test_vp_splice_v4i1_masked(<4 x i1> %va, <4 x i1> %vb, <4 x i1> ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t @@ -214,8 +214,8 @@ define <8 x i1> @test_vp_splice_v8i1_masked(<8 x i1> %va, <8 x i1> %vb, <8 x i1> ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t @@ -287,8 +287,8 @@ define <16 x i1> @test_vp_splice_v16i1_masked(<16 x i1> %va, <16 x i1> %vb, <16 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll index 668cff2342936..9027630825227 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll @@ -71,8 +71,8 @@ define @test_vp_splice_nxv1i1_masked( %va, @test_vp_splice_nxv2i1_masked( %va, @test_vp_splice_nxv4i1_masked( %va, @test_vp_splice_nxv8i1_masked( %va, @test_vp_splice_nxv16i1_masked( %va, ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v10, v14, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vslideup.vx v10, v12, a0, v0.t @@ -437,8 +437,8 @@ define @test_vp_splice_nxv32i1_masked( %va, ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v16, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vslideup.vx v16, v12, a0, v0.t @@ -511,8 +511,8 @@ define @test_vp_splice_nxv64i1_masked( %va, ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v24, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vslideup.vx v24, v16, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index c86fee6305931..c0d7ecf74956b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -274,8 +274,8 @@ define @vpgather_baseidx_nxv32i8(ptr %base, @vpgather_baseidx_nxv32i8(ptr %base, @vpgather_baseidx_nxv32i8(ptr %base, %idxs @@ -2269,18 +2269,18 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @llvm.vp.gather.nxv16f64.nxv16p0( %ptrs, %m, i32 %evl) @@ -2319,20 +2319,20 @@ define @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, %idxs @@ -2376,20 +2377,20 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: sub a3, a1, a2 -; RV32-NEXT: sltu a4, a1, a3 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: srli a4, a2, 3 ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a4 +; RV32-NEXT: sltu a4, a1, a3 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: bltu a1, a2, .LBB104_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB104_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; @@ -2398,25 +2399,26 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v10 -; RV64-NEXT: vsext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: sub a3, a1, a2 -; RV64-NEXT: sltu a4, a1, a3 -; RV64-NEXT: addi a4, a4, -1 -; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: srli a4, a2, 3 ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a4 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t +; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: bltu a1, a2, .LBB104_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB104_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to @@ -2434,20 +2436,20 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: sub a3, a1, a2 -; RV32-NEXT: sltu a4, a1, a3 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: srli a4, a2, 3 ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a4 +; RV32-NEXT: sltu a4, a1, a3 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: bltu a1, a2, .LBB105_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB105_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; @@ -2459,20 +2461,20 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: sub a3, a1, a2 -; RV64-NEXT: sltu a4, a1, a3 -; RV64-NEXT: addi a4, a4, -1 -; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: srli a4, a2, 3 ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a4 +; RV64-NEXT: sltu a4, a1, a3 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: bltu a1, a2, .LBB105_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB105_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index f07c16476c56a..1b1e9153a2fd5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -444,18 +444,18 @@ define @vpload_nxv16f64(ptr %ptr, %m, ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: slli a4, a2, 3 -; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: srli a5, a2, 3 ; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: bltu a1, a2, .LBB37_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB37_2: -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: ret %load = call @llvm.vp.load.nxv16f64.p0(ptr %ptr, %m, i32 %evl) @@ -489,10 +489,10 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, @vpmerge_vv_nxv128i8( %va, @vpmerge_vv_nxv128i8( %va, @vpmerge_vx_nxv128i8(i8 %a, %vb, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %a, i32 0 @@ -442,8 +442,8 @@ define @vpmerge_vi_nxv128i8( %vb, @llvm.vp.merge.nxv128i8( %m, splat (i8 2), %vb, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index 351fc500145ea..59662db42898f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -2124,10 +2124,10 @@ define void @vpscatter_nxv16f64( %val, ; RV32-NEXT: sub a2, a1, a0 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a0 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t ; RV32-NEXT: ret @@ -2157,13 +2157,13 @@ define void @vpscatter_nxv16f64( %val, ; RV64-NEXT: sub a0, a2, a1 ; RV64-NEXT: sltu a2, a2, a0 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a0, a2, a0 ; RV64-NEXT: srli a1, a1, 3 -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 @@ -2192,10 +2192,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV32-NEXT: sub a3, a2, a1 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: srli a1, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2232,11 +2232,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: sub a3, a2, a1 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: srli a1, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -2244,6 +2243,7 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 @@ -2273,10 +2273,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV32-NEXT: sub a3, a2, a1 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: srli a1, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2308,22 +2308,22 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB101_2: +; RV64-NEXT: addi a4, sp, 16 +; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: srli a1, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 10 @@ -2355,10 +2355,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV32-NEXT: sub a3, a2, a1 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: srli a1, a1, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2380,10 +2380,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-NEXT: sub a3, a2, a1 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: srli a1, a1, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 +; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index c12fc0497742a..ce0ee38bc7047 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -380,10 +380,10 @@ define void @vpstore_nxv16f64( %val, ptr %ptr, %val, ptr %ptr, ) define half @vreduce_fmin_nxv10f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv10f16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI73_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v12, (a0), zero ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: lui a1, %hi(.LCPI73_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI73_0) -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v12, (a1), zero ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll index 46560fc501c6f..f21b42e9519b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -192,8 +192,8 @@ define half @vpreduce_fadd_nxv64f16(half %s, %v, %v, %v, %v, %v, %v, % ; CHECK-NEXT: vmv.s.x v25, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vredmaxu.vs v25, v16, v25, v0.t ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll index 94ed7e568a01e..39666bb6119a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll @@ -24,8 +24,8 @@ define zeroext i1 @vpreduce_or_nxv1i1(i1 zeroext %s, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, < ; CHECK-NEXT: sltu a4, a1, a3 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a3, v8, v0.t ; CHECK-NEXT: snez a3, a3 ; CHECK-NEXT: bltu a1, a2, .LBB22_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB22_2: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a1, v11, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -391,8 +391,8 @@ define zeroext i1 @vpreduce_add_nxv1i1(i1 zeroext %s, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, < ; CHECK-LABEL: vpreduce_smin_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -696,8 +696,8 @@ define zeroext i1 @vpreduce_smin_nxv32i1(i1 zeroext %s, %v, < ; CHECK-LABEL: vpreduce_smin_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -712,8 +712,8 @@ define zeroext i1 @vpreduce_smin_nxv64i1(i1 zeroext %s, %v, < ; CHECK-LABEL: vpreduce_smin_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -728,8 +728,8 @@ define zeroext i1 @vpreduce_umax_nxv1i1(i1 zeroext %s, %v, %v, %v, %v, %v, < ; CHECK-LABEL: vpreduce_umax_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -808,8 +808,8 @@ define zeroext i1 @vpreduce_umax_nxv32i1(i1 zeroext %s, %v, < ; CHECK-LABEL: vpreduce_umax_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 @@ -824,8 +824,8 @@ define zeroext i1 @vpreduce_umax_nxv64i1(i1 zeroext %s, %v, < ; CHECK-LABEL: vpreduce_umax_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll index 462d49991ae4f..e95e9fabe9342 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll @@ -12,11 +12,11 @@ define internal void @foo( %v15, %0, %vs12.i.i.i, %1, %v37) { ; NOSUBREG-LABEL: foo: ; NOSUBREG: # %bb.0: # %loopIR.preheader.i.i -; NOSUBREG-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; NOSUBREG-NEXT: vmv.v.i v14, 0 -; NOSUBREG-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; NOSUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; NOSUBREG-NEXT: vmv.v.i v9, 0 -; NOSUBREG-NEXT: vmv.v.i v8, 0 +; NOSUBREG-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; NOSUBREG-NEXT: vmv.v.i v14, 0 +; NOSUBREG-NEXT: vmv1r.v v8, v9 ; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; NOSUBREG-NEXT: vrgatherei16.vv v8, v9, v14 ; NOSUBREG-NEXT: .LBB0_1: # %loopIR3.i.i @@ -32,11 +32,11 @@ define internal void @foo( %v15, %0, @vsadd_vi_nxv128i8( %va, @llvm.vp.sadd.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1366,8 +1366,8 @@ define @vsadd_vi_nxv32i32( %va, @llvm.vp.sadd.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll index 745b93b257085..454a4ebab04a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll @@ -586,8 +586,8 @@ define @vsaddu_vi_nxv128i8( %va, @llvm.vp.uadd.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1365,8 +1365,8 @@ define @vsaddu_vi_nxv32i32( %va, @llvm.vp.uadd.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index 4457c1002acc7..53b8e4a78b756 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -211,12 +211,12 @@ define @vfmerge_fv_nxv32f16( %va, half ; CHECK-ZVFHMIN: # %bb.0: ; CHECK-ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; CHECK-ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 -; CHECK-ZVFHMIN-NEXT: vmv.v.v v20, v16 +; CHECK-ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; CHECK-ZVFHMIN-NEXT: vmv.v.v v28, v24 ; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index 312378d393737..ee0617c931480 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -354,11 +354,17 @@ define @select_nxv32i32( %a, @select_nxv32i32( %a, @select_evl_nxv32i32( %a, @select_evl_nxv32i32( %a, @select_nxv16f64( %a, @select_nxv16f64( %a, @test8(i64 %avl, i8 zeroext %cond, @vsext_nxv32i8_nxv32i32( %a, @llvm.vp.sitofp.nxv32f16.nxv32i32( @vsitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vsitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v24, v0 +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: sub sp, sp, a1 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFH-NEXT: vmv1r.v v7, v0 +; ZVFH-NEXT: addi a1, sp, 16 +; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -396,16 +404,22 @@ define @vsitofp_nxv32f16_nxv32i32( %va, ; ZVFH-NEXT: sltu a3, a0, a2 ; ZVFH-NEXT: addi a3, a3, -1 ; ZVFH-NEXT: and a2, a3, a2 +; ZVFH-NEXT: addi a3, sp, 16 +; ZVFH-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.x.w v28, v16, v0.t +; ZVFH-NEXT: vfncvt.f.x.w v20, v24, v0.t ; ZVFH-NEXT: bltu a0, a1, .LBB25_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a0, a1 ; ZVFH-NEXT: .LBB25_2: +; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v24 -; ZVFH-NEXT: vfncvt.f.x.w v24, v8, v0.t -; ZVFH-NEXT: vmv8r.v v8, v24 +; ZVFH-NEXT: vfncvt.f.x.w v16, v8, v0.t +; ZVFH-NEXT: vmv8r.v v8, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vsitofp_nxv32f16_nxv32i32: @@ -428,8 +442,8 @@ define @vsitofp_nxv32f16_nxv32i32( %va, ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB25_2: -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8 @@ -460,8 +474,8 @@ define @vsitofp_nxv32f32_nxv32i32( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.sitofp.nxv32f32.nxv32i32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll index b56a0f40176cf..613b58b0f1b88 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll @@ -593,22 +593,22 @@ define @vssub_vi_nxv128i8( %va, @llvm.vp.ssub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v @@ -1393,25 +1393,25 @@ define @vssub_vi_nxv32i32( %va, @llvm.vp.ssub.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll index 8275c3081c7c1..8c729d7d9bfb6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -591,22 +591,22 @@ define @vssubu_vi_nxv128i8( %va, @llvm.vp.usub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v @@ -1391,25 +1391,25 @@ define @vssubu_vi_nxv32i32( %va, @llvm.vp.usub.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index 4857810e7a170..27755c166cc52 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -174,8 +174,8 @@ define @vtrunc_nxv15i16_nxv15i64( %a, @vtrunc_nxv32i7_nxv32i32( %a, @vtrunc_nxv32i8_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @llvm.vp.uitofp.nxv32f16.nxv32i32( @vuitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vuitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v24, v0 +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: sub sp, sp, a1 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFH-NEXT: vmv1r.v v7, v0 +; ZVFH-NEXT: addi a1, sp, 16 +; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -396,16 +404,22 @@ define @vuitofp_nxv32f16_nxv32i32( %va, ; ZVFH-NEXT: sltu a3, a0, a2 ; ZVFH-NEXT: addi a3, a3, -1 ; ZVFH-NEXT: and a2, a3, a2 +; ZVFH-NEXT: addi a3, sp, 16 +; ZVFH-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFH-NEXT: vfncvt.f.xu.w v28, v16, v0.t +; ZVFH-NEXT: vfncvt.f.xu.w v20, v24, v0.t ; ZVFH-NEXT: bltu a0, a1, .LBB25_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a0, a1 ; ZVFH-NEXT: .LBB25_2: +; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v24 -; ZVFH-NEXT: vfncvt.f.xu.w v24, v8, v0.t -; ZVFH-NEXT: vmv8r.v v8, v24 +; ZVFH-NEXT: vfncvt.f.xu.w v16, v8, v0.t +; ZVFH-NEXT: vmv8r.v v8, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vuitofp_nxv32f16_nxv32i32: @@ -428,8 +442,8 @@ define @vuitofp_nxv32f16_nxv32i32( %va, ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB25_2: -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v8 @@ -460,8 +474,8 @@ define @vuitofp_nxv32f32_nxv32i32( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.uitofp.nxv32f32.nxv32i32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll index c5f34eee31189..a869b433a4952 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll @@ -85,9 +85,9 @@ define @test3( %0, %1, @test3( %0, %1, @vzext_nxv32i8_nxv32i32( %a,