diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 7af543f018ccb..d405395dcf9ec 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -61,6 +61,9 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &); FunctionPass *createRISCVInsertVSETVLIPass(); void initializeRISCVInsertVSETVLIPass(PassRegistry &); +FunctionPass *createRISCVCoalesceVSETVLIPass(); +void initializeRISCVCoalesceVSETVLIPass(PassRegistry &); + FunctionPass *createRISCVPostRAExpandPseudoPass(); void initializeRISCVPostRAExpandPseudoPass(PassRegistry &); FunctionPass *createRISCVInsertReadWriteCSRPass(); diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 15efcf1dd1f7e..ec1a9f4c135cc 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -27,16 +27,19 @@ #include "RISCV.h" #include "RISCVSubtarget.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include using namespace llvm; #define DEBUG_TYPE "riscv-insert-vsetvli" #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass" +#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass" STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted"); -STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed"); +STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced"); static cl::opt DisableInsertVSETVLPHIOpt( "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, @@ -190,6 +193,11 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI, if (UseMO.getReg() == RISCV::NoRegister) return true; + if (UseMO.isUndef()) + return true; + if (UseMO.getReg().isPhysical()) + return false; + if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) { if (UseMI->isImplicitDef()) return true; @@ -780,11 +788,40 @@ class RISCVInsertVSETVLI : public MachineFunctionPass { VSETVLIInfo &Info) const; void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); void emitVSETVLIs(MachineBasicBlock &MBB); - void doLocalPostpass(MachineBasicBlock &MBB); void doPRE(MachineBasicBlock &MBB); void insertReadVL(MachineBasicBlock &MBB); }; +class RISCVCoalesceVSETVLI : public MachineFunctionPass { +public: + static char ID; + const RISCVSubtarget *ST; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + LiveIntervals *LIS; + + RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; } + +private: + bool coalesceVSETVLIs(MachineBasicBlock &MBB); +}; + } // end anonymous namespace char RISCVInsertVSETVLI::ID = 0; @@ -792,6 +829,11 @@ char RISCVInsertVSETVLI::ID = 0; INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, false, false) +char RISCVCoalesceVSETVLI::ID = 0; + +INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli", + RISCV_COALESCE_VSETVLI_NAME, false, false) + // Return a VSETVLIInfo representing the changes made by this VSETVLI or // VSETIVLI instruction. static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { @@ -1515,12 +1557,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, auto &AVL = MI.getOperand(1); auto &PrevAVL = PrevMI.getOperand(1); - assert(MRI.isSSA()); // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. // For now just check that PrevMI uses the same virtual register. if (AVL.isReg() && AVL.getReg() != RISCV::X0 && - (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg())) + (!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() || + PrevAVL.getReg() != AVL.getReg())) return false; } @@ -1530,7 +1572,7 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, return areCompatibleVTYPEs(PriorVType, VType, Used); } -void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { +bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) { MachineInstr *NextMI = nullptr; // We can have arbitrary code in successors, so VL and VTYPE // must be considered demanded. @@ -1563,8 +1605,28 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) { if (!isVLPreservingConfig(*NextMI)) { - MI.getOperand(0).setReg(NextMI->getOperand(0).getReg()); + Register DefReg = NextMI->getOperand(0).getReg(); + + MI.getOperand(0).setReg(DefReg); MI.getOperand(0).setIsDead(false); + + // The def of DefReg moved to MI, so extend the LiveInterval up to + // it. + if (DefReg.isVirtual()) { + LiveInterval &DefLI = LIS->getInterval(DefReg); + SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot(); + VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); + LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI); + DefLI.addSegment(S); + DefVNI->def = MISlot; + // Mark DefLI as spillable if it was previously unspillable + DefLI.setWeight(0); + + // DefReg may have had no uses, in which case we need to shrink + // the LiveInterval up to MI. + LIS->shrinkToUses(&DefLI); + } + Register OldVLReg; if (MI.getOperand(1).isReg()) OldVLReg = MI.getOperand(1).getReg(); @@ -1572,11 +1634,21 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); else MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false); - if (OldVLReg) { + + // Clear NextMI's AVL early so we're not counting it as a use. + if (NextMI->getOperand(1).isReg()) + NextMI->getOperand(1).setReg(RISCV::NoRegister); + + if (OldVLReg && OldVLReg.isVirtual()) { + // NextMI no longer uses OldVLReg so shrink its LiveInterval. + LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); + MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && - MRI->use_nodbg_empty(OldVLReg)) + MRI->use_nodbg_empty(OldVLReg)) { VLOpDef->eraseFromParent(); + LIS->removeInterval(OldVLReg); + } } MI.setDesc(NextMI->getDesc()); } @@ -1589,9 +1661,13 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { Used = getDemanded(MI, MRI, ST); } - NumRemovedVSETVL += ToDelete.size(); - for (auto *MI : ToDelete) + NumCoalescedVSETVL += ToDelete.size(); + for (auto *MI : ToDelete) { + LIS->RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); + } + + return !ToDelete.empty(); } void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { @@ -1666,15 +1742,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) emitVSETVLIs(MBB); - // Now that all vsetvlis are explicit, go through and do block local - // DSE and peephole based demanded fields based transforms. Note that - // this *must* be done outside the main dataflow so long as we allow - // any cross block analysis within the dataflow. We can't have both - // demanded fields based mutation and non-local analysis in the - // dataflow at the same time without introducing inconsistencies. - for (MachineBasicBlock &MBB : MF) - doLocalPostpass(MBB); - // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output // of VLEFF/VLSEGFF. for (MachineBasicBlock &MBB : MF) @@ -1688,3 +1755,29 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { FunctionPass *llvm::createRISCVInsertVSETVLIPass() { return new RISCVInsertVSETVLI(); } + +// Now that all vsetvlis are explicit, go through and do block local +// DSE and peephole based demanded fields based transforms. Note that +// this *must* be done outside the main dataflow so long as we allow +// any cross block analysis within the dataflow. We can't have both +// demanded fields based mutation and non-local analysis in the +// dataflow at the same time without introducing inconsistencies. +bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) { + // Skip if the vector extension is not enabled. + ST = &MF.getSubtarget(); + if (!ST->hasVInstructions()) + return false; + TII = ST->getInstrInfo(); + MRI = &MF.getRegInfo(); + LIS = &getAnalysis(); + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) + Changed |= coalesceVSETVLIs(MBB); + + return Changed; +} + +FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() { + return new RISCVCoalesceVSETVLI(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 34ddd63523108..0876f46728a10 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -116,6 +116,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVExpandPseudoPass(*PR); initializeRISCVFoldMasksPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); + initializeRISCVCoalesceVSETVLIPass(*PR); initializeRISCVInsertReadWriteCSRPass(*PR); initializeRISCVInsertWriteVXRMPass(*PR); initializeRISCVDAGToDAGISelPass(*PR); @@ -388,12 +389,14 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) { bool RISCVPassConfig::addRegAssignAndRewriteFast() { addPass(createRVVRegAllocPass(false)); + addPass(createRISCVCoalesceVSETVLIPass()); return TargetPassConfig::addRegAssignAndRewriteFast(); } bool RISCVPassConfig::addRegAssignAndRewriteOptimized() { addPass(createRVVRegAllocPass(true)); addPass(createVirtRegRewriter(false)); + addPass(createRISCVCoalesceVSETVLIPass()); return TargetPassConfig::addRegAssignAndRewriteOptimized(); } diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index faf37545e1a11..56bd4bd0c08f0 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -47,6 +47,10 @@ ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Slot index numbering +; CHECK-NEXT: Live Interval Analysis +; CHECK-NEXT: RISC-V Coalesce VSETVLI pass ; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis ; CHECK-NEXT: Fixup Statepoint Caller Saved diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 90472f246918f..4121d11109111 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -143,6 +143,7 @@ ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Greedy Register Allocator ; CHECK-NEXT: Virtual Register Rewriter +; CHECK-NEXT: RISC-V Coalesce VSETVLI pass ; CHECK-NEXT: Virtual Register Map ; CHECK-NEXT: Live Register Matrix ; CHECK-NEXT: Greedy Register Allocator diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 8e214e4054783..9e83efd351953 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float ; CHECK-NEXT: vfmv.v.f v8, fa4 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x float> poison, float %e0, i64 0 @@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d ; CHECK-NEXT: vfmv.v.f v8, fa4 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x double> poison, double %e0, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index 6bfd0ac932672..ed152e64a91ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v8, v11 +; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll index 85b849045e8ce..a8e4af2d7368e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -395,8 +395,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmin.d fa5, fa5, fa4 ; RV32-NEXT: fcvt.w.d a2, fa5, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v9, v9, a0 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: addi sp, s0, -128 @@ -496,8 +496,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmin.d fa5, fa5, fa4 ; RV64-NEXT: fcvt.l.d a2, fa5, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslide1down.vx v9, v9, a0 +; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: addi sp, s0, -128 @@ -580,8 +580,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa5, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa5, rtz -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v9, v9, a0 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: addi sp, s0, -128 @@ -656,8 +656,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa5, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa5, rtz -; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslide1down.vx v9, v9, a0 +; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: addi sp, s0, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 6da83644413bc..40ff8b50d99d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -70,8 +70,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v8, v11 +; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret @@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; V128-NEXT: vid.v v8 ; V128-NEXT: vsrl.vi v8, v8, 1 -; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: vadd.vi v8, v8, 1 +; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: vrgather.vv v10, v9, v8, v0.t ; V128-NEXT: vmv.v.v v8, v10 ; V128-NEXT: ret @@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu ; V512-NEXT: vid.v v8 ; V512-NEXT: vsrl.vi v8, v8, 1 -; V512-NEXT: vmv.v.i v0, 10 ; V512-NEXT: vadd.vi v8, v8, 1 +; V512-NEXT: vmv.v.i v0, 10 ; V512-NEXT: vrgather.vv v10, v9, v8, v0.t ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 0e8d9cf030669..58af6ac246d16 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -89,8 +89,8 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v11, (a0) -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -162,16 +162,16 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, 2 -; RV32-NEXT: li a0, 5 -; RV32-NEXT: vslide1down.vx v20, v16, a0 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV32-NEXT: vle16.v v21, (a0) +; RV32-NEXT: vle16.v v20, (a0) +; RV32-NEXT: li a0, 5 +; RV32-NEXT: vslide1down.vx v21, v16, a0 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v8, v20 ; RV32-NEXT: li a0, 164 ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v16, v8, v21 -; RV32-NEXT: vrgatherei16.vv v16, v12, v20, v0.t +; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -210,13 +210,13 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vmv.v.i v20, -1 +; RV32-NEXT: vrgatherei16.vv v12, v20, v16 ; RV32-NEXT: lui a0, %hi(.LCPI12_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) -; RV32-NEXT: vle16.v v17, (a0) +; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: li a0, 113 ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v12, v20, v16 -; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t +; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; @@ -368,9 +368,9 @@ define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -387,9 +387,9 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv.s.x v11, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -422,9 +422,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -441,9 +441,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v11, v10, 2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -464,9 +464,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 98 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -681,9 +681,9 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -695,12 +695,12 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: unmergable: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) ; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index f98cb343a2ab4..99364264de829 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -159,16 +159,17 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 56 +; RV32-NEXT: li a3, 54 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x36, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 54 * vlenb ; RV32-NEXT: addi a3, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a3) ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 21 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill @@ -176,31 +177,30 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vslideup.vi v8, v16, 4 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: slli a5, a4, 3 +; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 12 -; RV32-NEXT: vmv.s.x v3, a4 +; RV32-NEXT: vmv.s.x v0, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 +; RV32-NEXT: li a5, 37 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 2 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs1r.v v3, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 20 -; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: slli a5, a4, 4 +; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill @@ -209,71 +209,82 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: vle16.v v8, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: li a5, 13 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, %hi(.LCPI6_1) -; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1) -; RV32-NEXT: lui a5, 1 -; RV32-NEXT: vle16.v v8, (a4) -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vle32.v v24, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 45 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v24, (a3) +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: lui a1, %hi(.LCPI6_1) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_1) +; RV32-NEXT: lui a4, 1 +; RV32-NEXT: addi a4, a4, -64 +; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 48 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a5, a1, 2 +; RV32-NEXT: add a1, a5, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, a5, -64 -; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle32.v v16, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 12 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v2, a4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v4 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t +; RV32-NEXT: vrgatherei16.vv v8, v24, v4 +; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 20 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmv.v.v v12, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 20 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a3, 21 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv4r.v v16, v8 ; RV32-NEXT: vslideup.vi v8, v16, 2 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -283,36 +294,45 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: lui a1, %hi(.LCPI6_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: lui a3, %hi(.LCPI6_3) -; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3) -; RV32-NEXT: vle16.v v24, (a1) -; RV32-NEXT: vle16.v v8, (a3) +; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: lui a1, %hi(.LCPI6_3) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_3) +; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 +; RV32-NEXT: li a3, 45 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v24 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 12 +; RV32-NEXT: li a3, 13 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v4 +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 48 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload @@ -320,8 +340,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v20, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 12 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill @@ -330,171 +350,178 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a3, 21 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v12, v24, v8 +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vslideup.vi v12, v16, 6, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: vslideup.vi v12, v16, 6, v0.t -; RV32-NEXT: vmv.v.v v4, v12 +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_5) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: lui a3, %hi(.LCPI6_6) -; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) ; RV32-NEXT: vle16.v v24, (a1) -; RV32-NEXT: vle16.v v8, (a3) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 960 -; RV32-NEXT: vmv.s.x v2, a1 +; RV32-NEXT: lui a1, %hi(.LCPI6_6) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_6) +; RV32-NEXT: li a3, 960 +; RV32-NEXT: vle16.v v4, (a1) +; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 +; RV32-NEXT: li a3, 45 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v16, v24 -; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 48 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t +; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v4, v8 +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.v.v v12, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a3, 21 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v28, v24, v8 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v4, v16, v8 ; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v28, v8, 4, v0.t -; RV32-NEXT: vmv.v.v v4, v28 +; RV32-NEXT: vslideup.vi v4, v8, 4, v0.t ; RV32-NEXT: lui a1, %hi(.LCPI6_8) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_8) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: lui a3, %hi(.LCPI6_9) -; RV32-NEXT: addi a3, a3, %lo(.LCPI6_9) -; RV32-NEXT: vle16.v v28, (a1) -; RV32-NEXT: vle16.v v24, (a3) +; RV32-NEXT: vle16.v v0, (a1) +; RV32-NEXT: lui a1, %hi(.LCPI6_9) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9) +; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v28 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 48 +; RV32-NEXT: li a3, 45 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; RV32-NEXT: vrgatherei16.vv v8, v16, v0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v4, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: lui a1, %hi(.LCPI6_10) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) -; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: lui a1, 15 -; RV32-NEXT: vmv.s.x v6, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a3, 21 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v28, v24, 6 -; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v12, v8, 6 +; RV32-NEXT: lui a1, %hi(.LCPI6_10) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) +; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: lui a1, 15 +; RV32-NEXT: vmv.s.x v24, a1 +; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v28, v8, v4, v0.t +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t +; RV32-NEXT: vmv.v.v v28, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_11) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: lui a3, %hi(.LCPI6_12) -; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12) ; RV32-NEXT: vle16.v v0, (a1) -; RV32-NEXT: vle16.v v24, (a3) -; RV32-NEXT: li a1, 1008 -; RV32-NEXT: vmv.s.x v7, a1 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: lui a1, %hi(.LCPI6_12) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_12) +; RV32-NEXT: li a3, 1008 +; RV32-NEXT: vle16.v v4, (a1) +; RV32-NEXT: vmv.s.x v25, a3 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 +; RV32-NEXT: li a3, 45 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v16, v0 -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmv1r.v v0, v25 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 48 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v28, v8 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 21 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_13) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -503,79 +530,70 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: lui a1, %hi(.LCPI6_14) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_14) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: lui a2, %hi(.LCPI6_15) -; RV32-NEXT: addi a2, a2, %lo(.LCPI6_15) ; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: vle16.v v8, (a2) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: lui a1, %hi(.LCPI6_15) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_15) +; RV32-NEXT: vle16.v v28, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 45 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v0, v16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 29 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t +; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v24, v8 ; RV32-NEXT: addi a1, a0, 320 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vse32.v v24, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vse32.v v28, (a1) +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 21 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 192 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 12 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 20 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: add a1, a2, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a1, 54 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 @@ -586,320 +604,324 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 52 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x34, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 52 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: addi a2, a1, 256 ; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 27 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a2, a2, 5 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: addi a2, a1, 128 ; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 35 +; RV64-NEXT: li a3, 40 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v8, (a1) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v24, (a1) ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vrgather.vi v8, v16, 4 ; RV64-NEXT: li a1, 128 -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv.s.x v4, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v16, 8 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vmv1r.v v28, v0 +; RV64-NEXT: vslidedown.vi v16, v16, 8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 1 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 24 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 19 +; RV64-NEXT: li a2, 20 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv.v.v v4, v8 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vrgather.vi v8, v16, 2, v0.t +; RV64-NEXT: vmv.v.v v20, v8 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV64-NEXT: li a1, 6 ; RV64-NEXT: vid.v v8 -; RV64-NEXT: vmul.vx v2, v8, a1 +; RV64-NEXT: vmul.vx v6, v8, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v8, v24, v6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 48 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v2 +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: li a1, 56 -; RV64-NEXT: vmv.s.x v1, a1 -; RV64-NEXT: vadd.vi v30, v2, -16 +; RV64-NEXT: vmv.s.x v5, a1 +; RV64-NEXT: vadd.vi v16, v6, -16 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vmv1r.v v0, v5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 35 +; RV64-NEXT: li a2, 40 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v30, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v4, v8 +; RV64-NEXT: vmv.v.v v20, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 -; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 27 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v4, v16, 5 -; RV64-NEXT: vmv1r.v v0, v28 -; RV64-NEXT: vrgather.vi v4, v24, 3, v0.t -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs2r.v v2, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v16, v2, 1 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vrgather.vi v24, v16, 5 +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 24 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v24, v16 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v16, v2, -15 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v24, v16, 3, v0.t +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vadd.vi v28, v6, 1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 11 +; RV64-NEXT: li a2, 48 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v16, v28 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vadd.vi v28, v6, -15 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vmv1r.v v0, v5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 35 +; RV64-NEXT: li a2, 40 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma +; RV64-NEXT: vmv.v.v v24, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 11 +; RV64-NEXT: li a2, 12 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v2, v0.t -; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v4, v8 +; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vmv2r.v v26, v6 +; RV64-NEXT: vadd.vi v24, v6, 2 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 11 +; RV64-NEXT: li a2, 48 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v6, v2, 2 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v8, v24, v6 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v0, v24 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: li a1, 24 -; RV64-NEXT: vmv.s.x v7, a1 -; RV64-NEXT: vadd.vi v26, v2, -14 +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vadd.vi v24, v26, -14 +; RV64-NEXT: vmv2r.v v6, v26 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: vrgatherei16.vv v8, v16, v26, v0.t +; RV64-NEXT: vrgatherei16.vv v8, v16, v24, v0.t ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, 6 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 27 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v24, v12 +; RV64-NEXT: vrgatherei16.vv v20, v24, v12 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 1 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 20 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v6, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 19 +; RV64-NEXT: li a2, 24 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v24, 4, v0.t +; RV64-NEXT: vrgather.vi v20, v24, 4, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v16, v8 +; RV64-NEXT: vmv.v.v v20, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 3 -; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v28, v2, 3 +; RV64-NEXT: vmv2r.v v10, v6 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs2r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vadd.vi v8, v6, 3 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 48 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v28 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v16, v0, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v28, v2, -13 +; RV64-NEXT: vadd.vi v28, v10, -13 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 35 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 40 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v16, v8, v28, v0.t ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addi a1, a1, 7 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.x v12, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 27 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v24, v12 -; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv4r.v v8, v0 +; RV64-NEXT: vrgatherei16.vv v20, v0, v12 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 19 +; RV64-NEXT: li a2, 20 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v24, 5, v0.t +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v20, v24, 5, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v16, v8 +; RV64-NEXT: vmv.v.v v20, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 1 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 20 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 96 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vmv.v.x v12, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: li a1, 192 ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vrgather.vi v28, v8, 2 +; RV64-NEXT: vrgatherei16.vv v28, v24, v12, v0.t +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 27 +; RV64-NEXT: li a2, 6 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v4, v16, 2 -; RV64-NEXT: vrgatherei16.vv v4, v24, v8, v0.t -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v16, v2, 4 +; RV64-NEXT: vl2r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v16, v24, 4 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 48 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v24, v16 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v0, v16 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: li a1, 28 -; RV64-NEXT: vmv.s.x v1, a1 -; RV64-NEXT: vadd.vi v16, v2, -12 +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vadd.vi v26, v24, -12 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 35 +; RV64-NEXT: li a2, 40 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v16, v26, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v4, v8 +; RV64-NEXT: vmv.v.v v28, v8 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 112 ; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.x v12, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 27 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgather.vi v8, v16, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 19 +; RV64-NEXT: li a2, 24 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgatherei16.vv v8, v16, v12, v0.t ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v12, v2, 5 +; RV64-NEXT: vadd.vi v12, v24, 5 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 48 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v24, v12 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v16, v0, v12 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v12, v2, -11 +; RV64-NEXT: vadd.vi v12, v24, -11 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 35 +; RV64-NEXT: li a2, 40 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -911,40 +933,43 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 256 -; RV64-NEXT: vse64.v v4, (a1) +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 1 -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: li a3, 20 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 3 -; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 11 +; RV64-NEXT: li a3, 12 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 -; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 52 +; RV64-NEXT: li a1, 56 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index c295fed2c28c1..023d707f07bff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -286,8 +286,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -303,8 +303,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -331,8 +331,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -348,8 +348,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -375,8 +375,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -391,8 +391,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll index 4f7b885d998e5..7fc442c88d101 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -25,10 +25,10 @@ define void @splat_v1i1(ptr %x, i1 %y) { ; CHECK-LABEL: splat_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 9fbc22221f99b..539a8403c9352 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -12728,8 +12728,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vslide1down.vx v8, v8, a5 ; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v8, v8, a7 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV32-NEXT: ret ; @@ -12803,8 +12803,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64V-NEXT: vmv.v.x v8, a3 ; RV64V-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-NEXT: vslide1down.vx v8, v8, a6 -; RV64V-NEXT: vmv.v.i v0, 15 ; RV64V-NEXT: vslide1down.vx v8, v8, a7 +; RV64V-NEXT: vmv.v.i v0, 15 ; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64V-NEXT: addi sp, s0, -128 ; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload @@ -12854,8 +12854,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -12896,8 +12896,8 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -12941,8 +12941,8 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -12986,8 +12986,8 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13031,8 +13031,8 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13074,8 +13074,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13120,8 +13120,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13167,8 +13167,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13217,8 +13217,8 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13264,8 +13264,8 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13320,8 +13320,8 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13367,8 +13367,8 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll index 2a0ec47a3de01..5f456c7824316 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll @@ -221,10 +221,10 @@ define i32 @reduce_sum_16xi32_prefix7(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vslideup.vi v8, v10, 7 -; CHECK-NEXT: vredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vslideup.vi v10, v8, 7 +; CHECK-NEXT: vredsum.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -248,9 +248,9 @@ define i32 @reduce_sum_16xi32_prefix8(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vredsum.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -670,15 +670,15 @@ define i32 @reduce_smax_16xi32_prefix5(ptr %p) { ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 5 +; CHECK-NEXT: vslideup.vi v10, v8, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 6 +; CHECK-NEXT: vslideup.vi v10, v8, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 7 -; CHECK-NEXT: vredmax.vs v8, v8, v8 +; CHECK-NEXT: vslideup.vi v10, v8, 7 +; CHECK-NEXT: vredmax.vs v8, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -715,15 +715,15 @@ define i32 @reduce_smin_16xi32_prefix5(ptr %p) { ; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 5 +; CHECK-NEXT: vslideup.vi v10, v8, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 6 +; CHECK-NEXT: vslideup.vi v10, v8, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 7 -; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vslideup.vi v10, v8, 7 +; CHECK-NEXT: vredmin.vs v8, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -830,9 +830,9 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) { ; CHECK-LABEL: reduce_fadd_16xf32_prefix2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vfredusum.vs v8, v8, v9 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vfredusum.vs v8, v9, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <16 x float>, ptr %p, align 256 @@ -847,15 +847,15 @@ define float @reduce_fadd_16xi32_prefix5(ptr %p) { ; CHECK: # %bb.0: ; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 5 +; CHECK-NEXT: vslideup.vi v10, v8, 5 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 6 +; CHECK-NEXT: vslideup.vi v10, v8, 6 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 7 -; CHECK-NEXT: vfredusum.vs v8, v8, v10 +; CHECK-NEXT: vslideup.vi v10, v8, 7 +; CHECK-NEXT: vfredusum.vs v8, v10, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <16 x float>, ptr %p, align 256 diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index 032d32109933f..ab7da9e0faf2b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -49,8 +49,8 @@ define <8 x i8> @v4i8_2(<4 x i8> %a, <4 x i8> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrsub.vi v8, v11, 3 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -174,8 +174,8 @@ define <8 x i16> @v4i16_2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrsub.vi v8, v11, 3 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -492,8 +492,8 @@ define <8 x half> @v4f16_2(<4 x half> %a, <4 x half> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrsub.vi v8, v11, 3 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index 9a5e86d61c265..922692ed88c9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -107,14 +107,14 @@ define void @vector_interleave_store_nxv16i64_nxv8i64( %a, %a, @vector_interleave_nxv4i64_nxv2i64( ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vand.vi v13, v12, 1 -; CHECK-NEXT: vmsne.vi v0, v13, 0 ; CHECK-NEXT: vsrl.vi v16, v12, 1 +; CHECK-NEXT: vand.vi v12, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -137,9 +137,9 @@ define @vector_interleave_nxv4i64_nxv2i64( ; ZVBB-NEXT: srli a0, a0, 2 ; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; ZVBB-NEXT: vid.v v12 -; ZVBB-NEXT: vand.vi v13, v12, 1 -; ZVBB-NEXT: vmsne.vi v0, v13, 0 ; ZVBB-NEXT: vsrl.vi v16, v12, 1 +; ZVBB-NEXT: vand.vi v12, v12, 1 +; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: vadd.vx v16, v16, a0, v0.t ; ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; ZVBB-NEXT: vrgatherei16.vv v12, v8, v16 @@ -288,32 +288,44 @@ define @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv16f64_nxv8f64( @vector_interleave_nxv16f64_nxv8f64( @llvm.riscv.vmfeq.mask.nxv1f16( define @intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfeq.mask.nxv2f16( define @intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfeq.mask.nxv4f16( define @intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfeq.mask.nxv8f16( define @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfeq.mask.nxv16f16( define @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfeq.mask.nxv1f32( define @intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfeq.mask.nxv2f32( define @intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfeq.mask.nxv4f32( define @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfeq.mask.nxv8f32( define @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfeq.mask.nxv1f64( define @intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfeq.mask.nxv2f64( define @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfeq.mask.nxv4f64( define @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll index 993b50a1c81ce..a6dad9eaa4f35 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfge.mask.nxv1f16( define @intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfge.mask.nxv2f16( define @intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfge.mask.nxv4f16( define @intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfge.mask.nxv8f16( define @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfge.mask.nxv16f16( define @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfge.mask.nxv1f32( define @intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfge.mask.nxv2f32( define @intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfge.mask.nxv4f32( define @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfge.mask.nxv8f32( define @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfge.mask.nxv1f64( define @intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfge.mask.nxv2f64( define @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfge.mask.nxv4f64( define @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll index 427f0eb28e7df..f643a4036381c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfgt.mask.nxv1f16( define @intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfgt.mask.nxv2f16( define @intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfgt.mask.nxv4f16( define @intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfgt.mask.nxv8f16( define @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfgt.mask.nxv16f16( define @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfgt.mask.nxv1f32( define @intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfgt.mask.nxv2f32( define @intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfgt.mask.nxv4f32( define @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfgt.mask.nxv8f32( define @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfgt.mask.nxv1f64( define @intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfgt.mask.nxv2f64( define @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfgt.mask.nxv4f64( define @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll index e5327632fc04f..6c52364c1fbd5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfle.mask.nxv1f16( define @intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfle.mask.nxv2f16( define @intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfle.mask.nxv4f16( define @intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfle.mask.nxv8f16( define @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfle.mask.nxv16f16( define @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfle.mask.nxv1f32( define @intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfle.mask.nxv2f32( define @intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfle.mask.nxv4f32( define @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfle.mask.nxv8f32( define @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfle.mask.nxv1f64( define @intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfle.mask.nxv2f64( define @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfle.mask.nxv4f64( define @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll index 64f257e355cea..37a9c6b081a1d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmflt.mask.nxv1f16( define @intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmflt.mask.nxv2f16( define @intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmflt.mask.nxv4f16( define @intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmflt.mask.nxv8f16( define @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmflt.mask.nxv16f16( define @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmflt.mask.nxv1f32( define @intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmflt.mask.nxv2f32( define @intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmflt.mask.nxv4f32( define @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmflt.mask.nxv8f32( define @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmflt.mask.nxv1f64( define @intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmflt.mask.nxv2f64( define @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmflt.mask.nxv4f64( define @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll index 6f6a2a5e8783c..5defce42091e5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfne.mask.nxv1f16( define @intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfne.mask.nxv2f16( define @intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfne.mask.nxv4f16( define @intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfne.mask.nxv8f16( define @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfne.mask.nxv16f16( define @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfne.mask.nxv1f32( define @intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfne.mask.nxv2f32( define @intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfne.mask.nxv4f32( define @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfne.mask.nxv8f32( define @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfne.mask.nxv1f64( define @intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfne.mask.nxv2f64( define @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfne.mask.nxv4f64( define @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll index da1c751b56630..cc6c1f585bb7d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i8( define @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmseq.mask.nxv2i8( define @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmseq.mask.nxv4i8( define @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmseq.mask.nxv8i8( define @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmseq.mask.nxv16i8( define @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmseq.mask.nxv32i8( define @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i16( define @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmseq.mask.nxv2i16( define @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmseq.mask.nxv4i16( define @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmseq.mask.nxv8i16( define @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmseq.mask.nxv16i16( define @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i32( define @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmseq.mask.nxv2i32( define @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmseq.mask.nxv4i32( define @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmseq.mask.nxv8i32( define @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i64( define @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmseq.mask.nxv2i64( define @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmseq.mask.nxv4i64( define @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll index 502fb9b24148f..c8f9b60a3f2da 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i8( define @intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsge.mask.nxv2i8( define @intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsge.mask.nxv4i8( define @intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsge.mask.nxv8i8( define @intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsge.mask.nxv16i8( define @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsge.mask.nxv32i8( define @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i16( define @intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsge.mask.nxv2i16( define @intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsge.mask.nxv4i16( define @intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsge.mask.nxv8i16( define @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsge.mask.nxv16i16( define @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i32( define @intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsge.mask.nxv2i32( define @intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsge.mask.nxv4i32( define @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsge.mask.nxv8i32( define @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i64( define @intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsge.mask.nxv2i64( define @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsge.mask.nxv4i64( define @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll index 9410a99d81423..b6c6d9e90f610 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i8( define @intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i8( define @intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i8( define @intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i8( define @intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i8( define @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv32i8( define @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i16( define @intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i16( define @intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i16( define @intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i16( define @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i16( define @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i32( define @intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i32( define @intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i32( define @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i32( define @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i64( define @intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i64( define @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i64( define @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll index b7a676e7f2dd3..dfd7096a65ebb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i8( define @intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsgt.mask.nxv2i8( define @intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsgt.mask.nxv4i8( define @intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsgt.mask.nxv8i8( define @intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsgt.mask.nxv16i8( define @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsgt.mask.nxv32i8( define @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i16( define @intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsgt.mask.nxv2i16( define @intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsgt.mask.nxv4i16( define @intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsgt.mask.nxv8i16( define @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsgt.mask.nxv16i16( define @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i32( define @intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsgt.mask.nxv2i32( define @intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsgt.mask.nxv4i32( define @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsgt.mask.nxv8i32( define @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i64( define @intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsgt.mask.nxv2i64( define @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsgt.mask.nxv4i64( define @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll index 88a632de067a6..8826be03bbebb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i8( define @intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i8( define @intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i8( define @intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i8( define @intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i8( define @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv32i8( define @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i16( define @intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i16( define @intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i16( define @intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i16( define @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i16( define @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i32( define @intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i32( define @intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i32( define @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i32( define @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i64( define @intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i64( define @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i64( define @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll index 2248ba03adfe7..5d5a28edbfe15 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i8( define @intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsle.mask.nxv2i8( define @intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsle.mask.nxv4i8( define @intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsle.mask.nxv8i8( define @intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsle.mask.nxv16i8( define @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsle.mask.nxv32i8( define @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i16( define @intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsle.mask.nxv2i16( define @intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsle.mask.nxv4i16( define @intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsle.mask.nxv8i16( define @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsle.mask.nxv16i16( define @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i32( define @intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsle.mask.nxv2i32( define @intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsle.mask.nxv4i32( define @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsle.mask.nxv8i32( define @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i64( define @intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsle.mask.nxv2i64( define @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsle.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsle.mask.nxv4i64( define @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsle.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll index 57bae83b25e0e..c58ac2d071831 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i8( define @intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsleu.mask.nxv2i8( define @intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsleu.mask.nxv4i8( define @intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsleu.mask.nxv8i8( define @intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsleu.mask.nxv16i8( define @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsleu.mask.nxv32i8( define @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i16( define @intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsleu.mask.nxv2i16( define @intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsleu.mask.nxv4i16( define @intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsleu.mask.nxv8i16( define @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsleu.mask.nxv16i16( define @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i32( define @intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsleu.mask.nxv2i32( define @intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsleu.mask.nxv4i32( define @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsleu.mask.nxv8i32( define @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i64( define @intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsleu.mask.nxv2i64( define @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsleu.mask.nxv4i64( define @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll index 6783f7feb624c..6c6e580b043d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i8( define @intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmslt.mask.nxv2i8( define @intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmslt.mask.nxv4i8( define @intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmslt.mask.nxv8i8( define @intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmslt.mask.nxv16i8( define @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmslt.mask.nxv32i8( define @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i16( define @intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmslt.mask.nxv2i16( define @intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmslt.mask.nxv4i16( define @intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmslt.mask.nxv8i16( define @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmslt.mask.nxv16i16( define @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i32( define @intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmslt.mask.nxv2i32( define @intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmslt.mask.nxv4i32( define @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmslt.mask.nxv8i32( define @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i64( define @intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmslt.mask.nxv2i64( define @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmslt.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmslt.mask.nxv4i64( define @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmslt.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll index b082b735a0207..76f3e449ab58f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i8( define @intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsltu.mask.nxv2i8( define @intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsltu.mask.nxv4i8( define @intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsltu.mask.nxv8i8( define @intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsltu.mask.nxv16i8( define @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsltu.mask.nxv32i8( define @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i16( define @intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsltu.mask.nxv2i16( define @intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsltu.mask.nxv4i16( define @intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsltu.mask.nxv8i16( define @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsltu.mask.nxv16i16( define @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i32( define @intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsltu.mask.nxv2i32( define @intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsltu.mask.nxv4i32( define @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsltu.mask.nxv8i32( define @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i64( define @intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsltu.mask.nxv2i64( define @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsltu.mask.nxv4i64( define @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll index bb4575e5d72cb..161c1bc4314fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i8( define @intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsne.mask.nxv2i8( define @intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsne.mask.nxv4i8( define @intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsne.mask.nxv8i8( define @intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsne.mask.nxv16i8( define @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsne.mask.nxv32i8( define @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i16( define @intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsne.mask.nxv2i16( define @intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsne.mask.nxv4i16( define @intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsne.mask.nxv8i16( define @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsne.mask.nxv16i16( define @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i32( define @intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsne.mask.nxv2i32( define @intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsne.mask.nxv4i32( define @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsne.mask.nxv8i32( define @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i64( define @intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsne.mask.nxv2i64( define @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsne.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsne.mask.nxv4i64( define @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsne.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv4i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir index e8620c848f8d3..39f517a100f52 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc %s -o - -mtriple=riscv64 -mattr=v \ -# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s +# RUN: -run-pass=riscv-insert-vsetvli,riscv-coalesce-vsetvli | FileCheck %s --- | source_filename = "vsetvli-insert.ll" @@ -166,7 +166,7 @@ body: | ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v8 %2:gprnox0 = COPY $x11 @@ -208,7 +208,7 @@ body: | ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 %pt, [[COPY1]], $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: %dead:vr = IMPLICIT_DEF - ; CHECK-NEXT: early-clobber %3:vr = PseudoVZEXT_VF2_M1 %dead, killed [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %3:vr = PseudoVZEXT_VF2_M1 %dead, [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY %3 ; CHECK-NEXT: PseudoRET implicit $v8 %1:gprnox0 = COPY $x11 @@ -282,8 +282,8 @@ body: | ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.x) ; CHECK-NEXT: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt2, [[COPY]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.y) ; CHECK-NEXT: %pt3:vr = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt3, killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype - ; CHECK-NEXT: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (store (s128) into %ir.x) + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt3, [[PseudoVLE64_V_M1_]], [[PseudoVLE64_V_M1_1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE64_V_M1 [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (store (s128) into %ir.x) ; CHECK-NEXT: PseudoRET %1:gpr = COPY $x11 %0:gpr = COPY $x10 @@ -328,8 +328,8 @@ body: | ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVREDSUM_VS_M1_E8_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1_E8 [[DEF]], killed [[PseudoVLE64_V_M1_]], killed [[PseudoVMV_V_I_M1_]], 2, 6 /* e64 */, 1 /* ta, mu */, implicit $vl, implicit $vtype - ; CHECK-NEXT: [[PseudoVMV_X_S:%[0-9]+]]:gpr = PseudoVMV_X_S killed [[PseudoVREDSUM_VS_M1_E8_]], 6 /* e64 */, implicit $vtype + ; CHECK-NEXT: [[PseudoVREDSUM_VS_M1_E8_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1_E8 [[DEF]], [[PseudoVLE64_V_M1_]], [[PseudoVMV_V_I_M1_]], 2, 6 /* e64 */, 1 /* ta, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVMV_X_S:%[0-9]+]]:gpr = PseudoVMV_X_S [[PseudoVREDSUM_VS_M1_E8_]], 6 /* e64 */, implicit $vtype ; CHECK-NEXT: $x10 = COPY [[PseudoVMV_X_S]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gpr = COPY $x10 @@ -418,7 +418,7 @@ body: | ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v8 %2:gprnox0 = COPY $x11 diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir index d0b76e7e4535b..fcd852f1210df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -27,7 +27,7 @@ body: | ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 3 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 - ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 152 /* e64, m1, tu, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 undef $v0_v1_v2_v3_v4_v5_v6, renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: $x12 = PseudoReadVLENB