diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index f78e5f8147d98..080b660cc48ca 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -445,6 +445,7 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) { } bool RISCVPassConfig::addRegAssignAndRewriteFast() { + addPass(createRISCVVMV0EliminationPass()); addPass(createRVVRegAllocPass(false)); addPass(createRISCVInsertVSETVLIPass()); if (TM->getOptLevel() != CodeGenOptLevel::None && @@ -454,6 +455,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() { } bool RISCVPassConfig::addRegAssignAndRewriteOptimized() { + addPass(createRISCVVMV0EliminationPass()); addPass(createRVVRegAllocPass(true)); addPass(createVirtRegRewriter(false)); addPass(createRISCVInsertVSETVLIPass()); @@ -618,8 +620,6 @@ void RISCVPassConfig::addPreRegAlloc() { if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner) addPass(&MachinePipelinerID); - - addPass(createRISCVVMV0EliminationPass()); } void RISCVPassConfig::addFastRegAlloc() { diff --git a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp index 9270a5b98a142..4e76450998400 100644 --- a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp +++ b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp @@ -34,6 +34,9 @@ #ifndef NDEBUG #include "llvm/ADT/PostOrderIterator.h" #endif +#include "llvm/CodeGen/LiveDebugVariables.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; @@ -51,15 +54,14 @@ class RISCVVMV0Elimination : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - MachineFunctionProperties getRequiredProperties() const override { - // TODO: We could move this closer to regalloc, out of SSA, which would - // allow scheduling past mask operands. We would need to preserve live - // intervals. - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::IsSSA); + AU.addUsedIfAvailable(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + + MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -88,12 +90,14 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { return false; MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); const TargetInstrInfo *TII = ST->getInstrInfo(); + auto *LISWrapper = getAnalysisIfAvailable(); + LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; #ifndef NDEBUG // Assert that we won't clobber any existing reads of v0 where we need to // insert copies. - const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); ReversePostOrderTraversal RPOT(&*MF.begin()); for (MachineBasicBlock *MBB : RPOT) { bool V0Clobbered = false; @@ -115,7 +119,6 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { #endif bool MadeChange = false; - SmallVector DeadCopies; // For any instruction with a vmv0 operand, replace it with a copy to v0. for (MachineBasicBlock &MBB : MF) { @@ -127,23 +130,39 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { if (isVMV0(MCOI)) { MachineOperand &MO = MI.getOperand(OpNo); Register Src = MO.getReg(); - assert(MO.isUse() && MO.getSubReg() == RISCV::NoSubRegister && - Src.isVirtual() && "vmv0 use in unexpected form"); - - // Peek through a single copy to match what isel does. - if (MachineInstr *SrcMI = MRI.getVRegDef(Src); - SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual() && - SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister) { - // Delete any dead copys to vmv0 to avoid allocating them. - if (MRI.hasOneNonDBGUse(Src)) - DeadCopies.push_back(SrcMI); - Src = SrcMI->getOperand(1).getReg(); + assert(MO.isUse() && Src.isVirtual() && + "vmv0 use in unexpected form"); + + // If undef don't emit a copy, since the IMPLICIT_DEF Src will no + // longer exist at this stage. + if (MO.isUndef()) { + MO.setReg(RISCV::V0); + MadeChange = true; + break; } - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::COPY), RISCV::V0) - .addReg(Src); - + MachineInstr *Copy = BuildMI(MBB, MI, MI.getDebugLoc(), + TII->get(RISCV::COPY), RISCV::V0) + .addReg(Src, 0, MO.getSubReg()); MO.setReg(RISCV::V0); + if (LIS) { + LIS->InsertMachineInstrInMaps(*Copy); + SlotIndex CopySI = LIS->getInstructionIndex(*Copy).getRegSlot(); + SlotIndex MISI = LIS->getInstructionIndex(MI).getRegSlot(); + + assert(std::distance(TRI->regunits(RISCV::V0).begin(), + TRI->regunits(RISCV::V0).end()) == 1); + unsigned Unit = *TRI->regunits(RISCV::V0).begin(); + + // Create a new definition of V0 from Copy To MI. + if (LiveRange *LR = LIS->getCachedRegUnit(Unit)) { + VNInfo *VNI = LR->getNextValue(CopySI, LIS->getVNInfoAllocator()); + LR->addSegment(LiveInterval::Segment(CopySI, MISI, VNI)); + } + + // Shrink Src's interval now that MI doesn't use it. + LIS->shrinkToUses(&LIS->getInterval(Src)); + } MadeChange = true; break; } @@ -151,9 +170,6 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { } } - for (MachineInstr *MI : DeadCopies) - MI->eraseFromParent(); - if (!MadeChange) return false; diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index 694662eab1681..3e7f59d4c5f5a 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -43,10 +43,10 @@ ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup -; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass +; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: Fast Register Allocator diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index beef7a574dc4f..c24152a021c61 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -128,7 +128,6 @@ ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup -; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions @@ -141,6 +140,7 @@ ; CHECK-NEXT: Register Coalescer ; CHECK-NEXT: Rename Disconnected Subregister Components ; CHECK-NEXT: Machine Instruction Scheduler +; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Debug Variable Analysis ; CHECK-NEXT: Live Stack Slot Analysis diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index df27b096967a2..5ca4dc8e21c44 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -15,20 +15,20 @@ define @vp_bitreverse_nxv1i8( %va, @vp_bitreverse_nxv2i8( %va, @vp_bitreverse_nxv4i8( %va, @vp_bitreverse_nxv8i8( %va, @vp_bitreverse_nxv16i8( %va, @vp_bitreverse_nxv32i8( %va, @vp_bitreverse_nxv64i8( %va, @vp_bitreverse_nxv1i16( %va, @vp_bitreverse_nxv2i16( %va, @vp_bitreverse_nxv4i16( %va, @vp_bitreverse_nxv8i16( %va, @vp_bitreverse_nxv16i16( %va, @vp_bitreverse_nxv32i16( %va, @vp_bitreverse_nxv1i32( %va, @vp_bitreverse_nxv2i32( %va, @vp_bitreverse_nxv4i32( %va, @vp_bitreverse_nxv8i32( %va, @vp_bitreverse_nxv16i32( %va, @vp_bitreverse_nxv1i64( %va, @vp_bitreverse_nxv1i64( %va, @vp_bitreverse_nxv2i64( %va, @vp_bitreverse_nxv2i64( %va, @vp_bitreverse_nxv4i64( %va, @vp_bitreverse_nxv4i64( %va, @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv1i9( %va, @vp_bswap_nxv1i32( %va, @vp_bswap_nxv2i32( %va, @vp_bswap_nxv4i32( %va, @vp_bswap_nxv8i32( %va, @vp_bswap_nxv16i32( %va, @vp_bswap_nxv1i64( %va, @vp_bswap_nxv1i64( %va, @vp_bswap_nxv2i64( %va, @vp_bswap_nxv2i64( %va, @vp_bswap_nxv4i64( %va, @vp_bswap_nxv4i64( %va, @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64( %va, @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64( %va, @vp_bswap_nxv1i48( %va, @vp_bswap_nxv1i48( %va, @vp_ceil_vv_nxv1bf16( %va, @vp_ceil_vv_nxv1bf16_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -76,12 +76,12 @@ define @vp_ceil_vv_nxv2bf16( %va, @vp_ceil_vv_nxv2bf16_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -130,12 +130,12 @@ define @vp_ceil_vv_nxv4bf16( %va, @vp_ceil_vv_nxv4bf16_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -184,12 +184,12 @@ define @vp_ceil_vv_nxv8bf16( %va, @vp_ceil_vv_nxv8bf16_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -238,12 +238,12 @@ define @vp_ceil_vv_nxv16bf16( %va, ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t @@ -265,11 +265,11 @@ define @vp_ceil_vv_nxv16bf16_unmasked( @vp_ceil_vv_nxv32bf16( %va, ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: lui a3, 307200 +; CHECK-NEXT: fsrmi a4, 3 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 @@ -307,11 +308,10 @@ define @vp_ceil_vv_nxv32bf16( %va, ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v5, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 3 ; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a4 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t @@ -326,11 +326,11 @@ define @vp_ceil_vv_nxv32bf16( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t @@ -353,11 +353,12 @@ define @vp_ceil_vv_nxv32bf16_unmasked( @vp_ceil_vv_nxv32bf16_unmasked( @vp_ceil_vv_nxv32bf16_unmasked( @vp_ceil_vv_nxv1f16( %va, @vp_ceil_vv_nxv1f16( %va, @vp_ceil_vv_nxv1f16_unmasked( %va, ; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -471,11 +471,11 @@ define @vp_ceil_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -497,9 +497,9 @@ define @vp_ceil_vv_nxv2f16( %va, @vp_ceil_vv_nxv2f16( %va, @vp_ceil_vv_nxv2f16_unmasked( %va, ; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -557,11 +557,11 @@ define @vp_ceil_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -583,9 +583,9 @@ define @vp_ceil_vv_nxv4f16( %va, @vp_ceil_vv_nxv4f16( %va, @vp_ceil_vv_nxv4f16_unmasked( %va, ; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -643,11 +643,11 @@ define @vp_ceil_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -665,14 +665,14 @@ declare @llvm.vp.ceil.nxv8f16(, @vp_ceil_vv_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI18_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -689,12 +689,12 @@ define @vp_ceil_vv_nxv8f16( %va, @vp_ceil_vv_nxv8f16_unmasked( %va, ; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -731,11 +731,11 @@ define @vp_ceil_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -753,14 +753,14 @@ declare @llvm.vp.ceil.nxv16f16(, @vp_ceil_vv_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v12, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) ; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vmv1r.v v0, v12 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -777,12 +777,12 @@ define @vp_ceil_vv_nxv16f16( %va, @vp_ceil_vv_nxv16f16_unmasked( % ; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -819,11 +819,11 @@ define @vp_ceil_vv_nxv16f16_unmasked( % ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -841,14 +841,14 @@ declare @llvm.vp.ceil.nxv32f16(, @vp_ceil_vv_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v16, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vmv1r.v v0, v16 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -864,6 +864,7 @@ define @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16_unmasked( % ; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -941,11 +941,12 @@ define @vp_ceil_vv_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: fsrmi a4, 3 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: vmv1r.v v6, v7 @@ -958,11 +959,10 @@ define @vp_ceil_vv_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v6, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a2, 3 ; ZVFHMIN-NEXT: vmv1r.v v0, v6 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t -; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: fsrm a4 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t @@ -975,10 +975,10 @@ define @vp_ceil_vv_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1000,9 +1000,9 @@ define @vp_ceil_vv_nxv1f32( %va, @vp_ceil_vv_nxv1f32_unmasked( %v ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1042,9 +1042,9 @@ define @vp_ceil_vv_nxv2f32( %va, @vp_ceil_vv_nxv2f32_unmasked( %v ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1081,13 +1081,13 @@ define @vp_ceil_vv_nxv4f32( %va, @vp_ceil_vv_nxv4f32_unmasked( %v ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1125,13 +1125,13 @@ define @vp_ceil_vv_nxv8f32( %va, @vp_ceil_vv_nxv8f32_unmasked( %v ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1169,13 +1169,13 @@ define @vp_ceil_vv_nxv16f32( %va, @vp_ceil_vv_nxv16f32_unmasked( ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1216,9 +1216,9 @@ define @vp_ceil_vv_nxv1f64( %va, @vp_ceil_vv_nxv1f64_unmasked( ; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1254,14 +1254,14 @@ declare @llvm.vp.ceil.nxv2f64(, @vp_ceil_vv_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -1281,8 +1281,8 @@ define @vp_ceil_vv_nxv2f64_unmasked( ; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1298,14 +1298,14 @@ declare @llvm.vp.ceil.nxv4f64(, @vp_ceil_vv_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1325,8 +1325,8 @@ define @vp_ceil_vv_nxv4f64_unmasked( ; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1342,14 +1342,14 @@ declare @llvm.vp.ceil.nxv7f64(, @vp_ceil_vv_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1369,8 +1369,8 @@ define @vp_ceil_vv_nxv7f64_unmasked( ; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1386,14 +1386,14 @@ declare @llvm.vp.ceil.nxv8f64(, @vp_ceil_vv_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1413,8 +1413,8 @@ define @vp_ceil_vv_nxv8f64_unmasked( ; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1438,20 +1438,20 @@ define @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: fsrmi a3, 3 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 3 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a3 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -1462,10 +1462,11 @@ define @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v6, v7 ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -1487,12 +1488,12 @@ define @vp_ceil_vv_nxv16f64_unmasked( @vp_ceil_vv_nxv16f64_unmasked( %x) { ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vslidedown.vi v5, v8, 8 ; CHECK-NEXT: vslidedown.vi v4, v0, 8 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v5, 4 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v4, 4 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v7, 4 +; CHECK-NEXT: vslidedown.vi v7, v6, 4 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v6, 4 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vadd.vv v16, v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 ; CHECK-NEXT: vadd.vv v8, v24, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v5 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v5, 4 +; CHECK-NEXT: vslidedown.vi v7, v4, 4 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: csrr a0, vlenb @@ -155,9 +139,9 @@ define i32 @test_v256i1(<256 x i1> %x) { ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v24, v0, v24 -; CHECK-NEXT: vadd.vv v8, v16, v8 -; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vadd.vv v24, v24, v0 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 @@ -248,62 +232,45 @@ entry: define i32 @test_nxv128i1( %x) { ; CHECK-LABEL: test_nxv128i1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 -; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: vmv1r.v v5, v0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 ; CHECK-NEXT: srli a1, a0, 1 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: vslidedown.vx v6, v8, a1 +; CHECK-NEXT: vslidedown.vx v4, v0, a1 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vadd.vv v24, v24, v8 ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a0 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v5, a0 +; CHECK-NEXT: vslidedown.vx v5, v4, a0 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v6, a0 -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v7, a1 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v5, v6, a0 -; CHECK-NEXT: vslidedown.vx v4, v7, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vslidedown.vx v0, v7, a0 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu ; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v6, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t -; CHECK-NEXT: vadd.vv v16, v16, v24 -; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add sp, sp, a1 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret entry: %a = zext %x to @@ -318,163 +285,97 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vmv1r.v v6, v10 -; CHECK-NEXT: vmv1r.v v7, v9 +; CHECK-NEXT: vmv1r.v v4, v9 ; CHECK-NEXT: vmv1r.v v5, v8 -; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v5 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: srli a0, a1, 1 -; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: srli a0, a1, 2 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v3, v4, a0 ; CHECK-NEXT: vslidedown.vx v2, v5, a0 -; CHECK-NEXT: vmv.v.v v0, v3 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t +; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v3, a1 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v2, a1 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v24, v8, 1, v0 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v4, a1 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v5, a1 -; CHECK-NEXT: vslidedown.vx v5, v7, a1 -; CHECK-NEXT: vslidedown.vx v4, v6, a1 +; CHECK-NEXT: vslidedown.vx v0, v6, a0 ; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v7, a0 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v3 ; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t ; CHECK-NEXT: vadd.vv v8, v16, v8 ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: srli a1, a1, 1 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v5, v7, a0 -; CHECK-NEXT: vslidedown.vx v4, v6, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v4, a1 -; CHECK-NEXT: vslidedown.vx v3, v5, a1 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v3 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vslidedown.vx v4, v4, a1 +; CHECK-NEXT: vslidedown.vx v7, v7, a1 +; CHECK-NEXT: vslidedown.vx v6, v6, a1 +; CHECK-NEXT: vslidedown.vx v5, v5, a1 +; CHECK-NEXT: vmv.v.v v0, v5 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t -; CHECK-NEXT: vadd.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v5 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t -; CHECK-NEXT: vadd.vv v16, v8, v16 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vadd.vv v24, v16, v8 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v7, a0 +; CHECK-NEXT: vslidedown.vx v7, v5, a0 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v6, a0 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v4, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t -; CHECK-NEXT: vadd.vv v24, v24, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v8, v8, v0 -; CHECK-NEXT: vadd.vv v16, v24, v16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: vadd.vv v8, v16, v8 ; CHECK-NEXT: vmv.s.x v16, zero ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -503,16 +404,15 @@ define i16 @test_narrow_nxv64i1( %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v0, a0 +; CHECK-NEXT: vslidedown.vx v0, v0, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vredsum.vs v8, v16, v8 +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll index f6c26bbba89fe..0b93ba8641adb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll @@ -18,52 +18,43 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lanes.b, <4 x i1> %sel) { ; RV32-LABEL: constant_folding_crash: ; RV32: # %bb.0: # %entry -; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: lw a0, 8(a0) +; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vmerge.vim v10, v10, 1, v0 +; RV32-NEXT: vrgather.vi v11, v10, 0 +; RV32-NEXT: vmsne.vi v10, v11, 0 ; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: seqz a0, a0 ; RV32-NEXT: vmv.v.x v11, a0 ; RV32-NEXT: vmsne.vi v0, v11, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 -; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v11, 10 -; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vrgather.vi v10, v9, 0 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: vse32.v v11, (a0), v0.t +; RV32-NEXT: vmv.v.i v8, 10 +; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vse32.v v8, (a0), v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: constant_folding_crash: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: ld a0, 8(a0) +; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vmerge.vim v12, v12, 1, v0 +; RV64-NEXT: vrgather.vi v13, v12, 0 +; RV64-NEXT: vmsne.vi v12, v13, 0 ; RV64-NEXT: andi a0, a0, 1 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: vmv.v.x v13, a0 ; RV64-NEXT: vmsne.vi v0, v13, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64-NEXT: vmv.v.i v10, 10 +; RV64-NEXT: vmv.v.i v8, 10 ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: vrgather.vi v11, v9, 0 -; RV64-NEXT: vmsne.vi v0, v11, 0 -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: vse32.v v10, (a0), v0.t +; RV64-NEXT: vse32.v v8, (a0), v0.t ; RV64-NEXT: ret entry: %sunkaddr = getelementptr i8, ptr %v54, i64 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir index be73d4808937a..fa90b80660579 100644 --- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir +++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir @@ -19,9 +19,9 @@ ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: bgeu a0, a2, .LBB0_2 ; CHECK-NEXT: .LBB0_4: # %entry + ; CHECK-NEXT: vsll.vi v9, v8, 5 ; CHECK-NEXT: vmsne.vi v0, v8, 0 - ; CHECK-NEXT: vsll.vi v8, v8, 5 - ; CHECK-NEXT: vmerge.vim v9, v8, -1, v0 + ; CHECK-NEXT: vmerge.vim v9, v9, -1, v0 ; CHECK-NEXT: vse64.v v9, (a1) ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index 570ff34b0f23a..479c511dd136c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -1349,7 +1349,6 @@ define @vp_ctlz_zero_undef_nxv1i8( %va, @vp_ctlz_zero_undef_nxv1i8( %va, @vp_ctlz_zero_undef_nxv2i8( %va, @vp_ctlz_zero_undef_nxv2i8( %va, @vp_ctlz_zero_undef_nxv4i8( %va, @vp_ctlz_zero_undef_nxv4i8( %va, @vp_ctlz_zero_undef_nxv8i8( %va, @vp_ctlz_zero_undef_nxv8i8( %va, @vp_ctlz_zero_undef_nxv16i8( %va, @vp_ctlz_zero_undef_nxv16i8( %va, @vp_ctlz_zero_undef_nxv1i16( %va, @vp_ctlz_zero_undef_nxv2i16( %va, @vp_ctlz_zero_undef_nxv4i16( %va, @vp_ctlz_zero_undef_nxv8i16( %va, @vp_ctlz_zero_undef_nxv16i16( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t -; CHECK-NEXT: li a0, 142 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 ; CHECK-NEXT: vrsub.vx v8, v16, a0, v0.t ; CHECK-NEXT: ret ; @@ -2027,9 +2027,9 @@ define @vp_ctlz_zero_undef_nxv1i32( %va, @vp_ctlz_zero_undef_nxv2i32( %va, @vp_ctlz_zero_undef_nxv4i32( %va, @vp_ctlz_zero_undef_nxv8i32( %va, @vp_ctlz_zero_undef_nxv16i32( %va, ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t -; CHECK-NEXT: li a0, 158 ; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: li a0, 158 ; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: ret @@ -2604,12 +2604,12 @@ define @vp_ctlz_zero_undef_nxv1i9( %va, @vp_ctlo_nxv1i9( %va, @vp_ctlo_nxv1i9_vp_xor( %va, @vp_ctpop_nxv1i64( %va, @vp_ctpop_nxv1i64( %va, @vp_ctpop_nxv2i64( %va, @vp_ctpop_nxv2i64( %va, @vp_ctpop_nxv4i64( %va, @vp_ctpop_nxv4i64( %va, @vp_ctpop_nxv7i64( %va, @vp_ctpop_nxv7i64( %va, @vp_ctpop_nxv8i64( %va, @vp_ctpop_nxv8i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @cttz_nxv1i8( %va) { ; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv1i8: @@ -58,9 +58,9 @@ define @cttz_nxv1i8( %va) { ; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv1i8: @@ -106,9 +106,9 @@ define @cttz_nxv2i8( %va) { ; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv2i8: @@ -123,9 +123,9 @@ define @cttz_nxv2i8( %va) { ; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv2i8: @@ -171,9 +171,9 @@ define @cttz_nxv4i8( %va) { ; CHECK-F-NEXT: vnsrl.wi v9, v10, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv4i8: @@ -188,9 +188,9 @@ define @cttz_nxv4i8( %va) { ; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv4i8: @@ -236,9 +236,9 @@ define @cttz_nxv8i8( %va) { ; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv8i8: @@ -253,9 +253,9 @@ define @cttz_nxv8i8( %va) { ; CHECK-D-NEXT: vnsrl.wi v10, v12, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv8i8: @@ -301,9 +301,9 @@ define @cttz_nxv16i8( %va) { ; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-F-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-F-NEXT: vsub.vx v10, v10, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v10, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v10, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv16i8: @@ -318,9 +318,9 @@ define @cttz_nxv16i8( %va) { ; CHECK-D-NEXT: vnsrl.wi v12, v16, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-D-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-D-NEXT: vsub.vx v10, v10, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v10, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v10, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index 71136a6526c44..593c9998ec10b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -15,8 +15,8 @@ define @vp_cttz_nxv1i8( %va, @vp_cttz_nxv2i8( %va, @vp_cttz_nxv4i8( %va, @vp_cttz_nxv8i8( %va, @vp_cttz_nxv16i8( %va, @vp_cttz_nxv32i8( %va, @vp_cttz_nxv64i8( %va, @vp_cttz_nxv1i16( %va, @vp_cttz_nxv2i16( %va, @vp_cttz_nxv4i16( %va, @vp_cttz_nxv8i16( %va, @vp_cttz_nxv16i16( %va, @vp_cttz_nxv32i16( %va, @vp_cttz_nxv1i32( %va, @vp_cttz_nxv2i32( %va, @vp_cttz_nxv4i32( %va, @vp_cttz_nxv8i32( %va, @vp_cttz_nxv16i32( %va, @vp_cttz_nxv1i64( %va, @vp_cttz_nxv1i64( %va, @vp_cttz_nxv1i64( %va, @vp_cttz_nxv2i64( %va, @vp_cttz_nxv2i64( %va, @vp_cttz_nxv2i64( %va, @vp_cttz_nxv4i64( %va, @vp_cttz_nxv4i64( %va, @vp_cttz_nxv7i64( %va, @vp_cttz_nxv7i64( %va, @vp_cttz_nxv8i64( %va, @vp_cttz_nxv8i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_zero_undef_nxv1i8( %va, @vp_cttz_zero_undef_nxv1i8( %va, @vp_cttz_zero_undef_nxv2i8( %va, @vp_cttz_zero_undef_nxv2i8( %va, @vp_cttz_zero_undef_nxv4i8( %va, @vp_cttz_zero_undef_nxv4i8( %va, @vp_cttz_zero_undef_nxv8i8( %va, @vp_cttz_zero_undef_nxv8i8( %va, @vp_cttz_zero_undef_nxv16i8( %va, @vp_cttz_zero_undef_nxv16i8( %va, @vp_cttz_zero_undef_nxv32i8( %va, @vp_cttz_zero_undef_nxv64i8( %va, @vp_cttz_zero_undef_nxv1i16( %va, @vp_cttz_zero_undef_nxv2i16( %va, @vp_cttz_zero_undef_nxv4i16( %va, @vp_cttz_zero_undef_nxv8i16( %va, @vp_cttz_zero_undef_nxv16i16( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t -; CHECK-NEXT: li a0, 127 ; CHECK-NEXT: vand.vv v16, v8, v12, v0.t ; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 ; CHECK-NEXT: vsub.vx v8, v16, a0, v0.t ; CHECK-NEXT: ret ; @@ -3238,10 +3238,10 @@ define @vp_cttz_zero_undef_nxv32i16( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t -; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t ; CHECK-NEXT: lui a0, 3 @@ -3318,9 +3318,9 @@ define @vp_cttz_zero_undef_nxv1i32( %va, @vp_cttz_zero_undef_nxv2i32( %va, @vp_cttz_zero_undef_nxv4i32( %va, @vp_cttz_zero_undef_nxv8i32( %va, @vp_cttz_zero_undef_nxv16i32( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: li a1, 127 ; CHECK-NEXT: vand.vv v8, v8, v16, v0.t ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: li a1, 127 ; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: ret @@ -3820,8 +3820,8 @@ define @vp_cttz_zero_undef_nxv16i64( %va, ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v24, v8, 0, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vand.vv v8, v8, v24, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t @@ -3939,13 +3939,13 @@ define @vp_zero_undef_cttz_nxv1i9( %va, @ceil_nxv1f64_to_si8( %x) { ; RV32-NEXT: fld fa5, %lo(.LCPI16_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -522,8 +522,8 @@ define @ceil_nxv1f64_to_si8( %x) { ; RV64-NEXT: fld fa5, %lo(.LCPI16_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -548,8 +548,8 @@ define @ceil_nxv1f64_to_ui8( %x) { ; RV32-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -569,8 +569,8 @@ define @ceil_nxv1f64_to_ui8( %x) { ; RV64-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -595,8 +595,8 @@ define @ceil_nxv1f64_to_si16( %x) { ; RV32-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -614,8 +614,8 @@ define @ceil_nxv1f64_to_si16( %x) { ; RV64-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -638,8 +638,8 @@ define @ceil_nxv1f64_to_ui16( %x) { ; RV32-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -657,8 +657,8 @@ define @ceil_nxv1f64_to_ui16( %x) { ; RV64-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -775,8 +775,8 @@ define @ceil_nxv4f64_to_si8( %x) { ; RV32-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -796,8 +796,8 @@ define @ceil_nxv4f64_to_si8( %x) { ; RV64-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -822,8 +822,8 @@ define @ceil_nxv4f64_to_ui8( %x) { ; RV32-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -843,8 +843,8 @@ define @ceil_nxv4f64_to_ui8( %x) { ; RV64-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -869,8 +869,8 @@ define @ceil_nxv4f64_to_si16( %x) { ; RV32-NEXT: fld fa5, %lo(.LCPI26_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -888,8 +888,8 @@ define @ceil_nxv4f64_to_si16( %x) { ; RV64-NEXT: fld fa5, %lo(.LCPI26_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -912,8 +912,8 @@ define @ceil_nxv4f64_to_ui16( %x) { ; RV32-NEXT: fld fa5, %lo(.LCPI27_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -931,8 +931,8 @@ define @ceil_nxv4f64_to_ui16( %x) { ; RV64-NEXT: fld fa5, %lo(.LCPI27_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/expandload.ll b/llvm/test/CodeGen/RISCV/rvv/expandload.ll index 25706bdec55c3..72a166e5df52a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/expandload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/expandload.ll @@ -271,25 +271,25 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8 ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, ta, mu -; CHECK-RV32-NEXT: viota.m v24, v0 +; CHECK-RV32-NEXT: viota.m v16, v0 ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: li a1, 24 ; CHECK-RV32-NEXT: mul a0, a0, a1 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vrgather.vv v24, v8, v16, v0.t ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: li a1, 24 ; CHECK-RV32-NEXT: mul a0, a0, a1 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: viota.m v8, v7 ; CHECK-RV32-NEXT: vmv1r.v v0, v7 ; CHECK-RV32-NEXT: csrr a0, vlenb @@ -319,12 +319,12 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8 ; CHECK-RV64-NEXT: addi sp, sp, -16 ; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV64-NEXT: csrr a2, vlenb -; CHECK-RV64-NEXT: slli a2, a2, 5 +; CHECK-RV64-NEXT: li a3, 40 +; CHECK-RV64-NEXT: mul a2, a2, a3 ; CHECK-RV64-NEXT: sub sp, sp, a2 -; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-RV64-NEXT: csrr a2, vlenb -; CHECK-RV64-NEXT: li a3, 24 -; CHECK-RV64-NEXT: mul a2, a2, a3 +; CHECK-RV64-NEXT: slli a2, a2, 5 ; CHECK-RV64-NEXT: add a2, sp, a2 ; CHECK-RV64-NEXT: addi a2, a2, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill @@ -335,10 +335,7 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8 ; CHECK-RV64-NEXT: vmv.x.s a3, v0 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-RV64-NEXT: vle8.v v16, (a1) -; CHECK-RV64-NEXT: csrr a1, vlenb -; CHECK-RV64-NEXT: slli a1, a1, 3 -; CHECK-RV64-NEXT: add a1, sp, a1 -; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: addi a1, sp, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-RV64-NEXT: vmv.x.s a1, v8 @@ -347,7 +344,8 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8 ; CHECK-RV64-NEXT: vsetvli zero, a4, e8, m8, ta, ma ; CHECK-RV64-NEXT: vle8.v v8, (a0) ; CHECK-RV64-NEXT: csrr a4, vlenb -; CHECK-RV64-NEXT: slli a4, a4, 4 +; CHECK-RV64-NEXT: li a5, 24 +; CHECK-RV64-NEXT: mul a4, a4, a5 ; CHECK-RV64-NEXT: add a4, sp, a4 ; CHECK-RV64-NEXT: addi a4, a4, 16 ; CHECK-RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill @@ -359,46 +357,58 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8 ; CHECK-RV64-NEXT: add a0, a0, a1 ; CHECK-RV64-NEXT: vsetvli zero, a4, e8, m8, ta, ma ; CHECK-RV64-NEXT: vle8.v v8, (a0) -; CHECK-RV64-NEXT: addi a0, sp, 16 +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 4 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, mu -; CHECK-RV64-NEXT: viota.m v16, v0 +; CHECK-RV64-NEXT: viota.m v8, v0 ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 4 +; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: li a1, 24 ; CHECK-RV64-NEXT: mul a0, a0, a1 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 5 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 3 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: vrgather.vv v8, v24, v16, v0.t ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: slli a0, a0, 5 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: viota.m v8, v7 ; CHECK-RV64-NEXT: vmv1r.v v0, v7 ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 3 +; CHECK-RV64-NEXT: slli a0, a0, 4 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: addi a0, sp, 16 +; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: slli a0, a0, 5 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 5 +; CHECK-RV64-NEXT: li a1, 40 +; CHECK-RV64-NEXT: mul a0, a0, a1 ; CHECK-RV64-NEXT: add sp, sp, a0 ; CHECK-RV64-NEXT: .cfi_def_cfa sp, 16 ; CHECK-RV64-NEXT: addi sp, sp, 16 @@ -644,17 +654,18 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x ; CHECK-RV32-NEXT: addi sp, sp, -16 ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: csrr a1, vlenb -; CHECK-RV32-NEXT: slli a1, a1, 5 -; CHECK-RV32-NEXT: sub sp, sp, a1 -; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-RV32-NEXT: csrr a1, vlenb ; CHECK-RV32-NEXT: li a2, 24 ; CHECK-RV32-NEXT: mul a1, a1, a2 +; CHECK-RV32-NEXT: sub sp, sp, a1 +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-RV32-NEXT: csrr a1, vlenb +; CHECK-RV32-NEXT: slli a1, a1, 4 ; CHECK-RV32-NEXT: add a1, sp, a1 ; CHECK-RV32-NEXT: addi a1, a1, 16 ; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: li a1, 64 ; CHECK-RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 +; CHECK-RV32-NEXT: li a1, 64 ; CHECK-RV32-NEXT: vslidedown.vi v7, v0, 8 ; CHECK-RV32-NEXT: li a2, 32 ; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma @@ -662,45 +673,34 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x ; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-RV32-NEXT: vcpop.m a4, v0 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsrl.vx v16, v0, a2 +; CHECK-RV32-NEXT: vsrl.vx v8, v0, a2 ; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-RV32-NEXT: vcpop.m a2, v7 ; CHECK-RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma ; CHECK-RV32-NEXT: vle16.v v24, (a0) -; CHECK-RV32-NEXT: csrr a5, vlenb -; CHECK-RV32-NEXT: slli a5, a5, 4 -; CHECK-RV32-NEXT: add a5, sp, a5 -; CHECK-RV32-NEXT: addi a5, a5, 16 -; CHECK-RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.x.s a4, v16 +; CHECK-RV32-NEXT: vmv.x.s a4, v8 ; CHECK-RV32-NEXT: cpop a4, a4 ; CHECK-RV32-NEXT: cpop a3, a3 ; CHECK-RV32-NEXT: add a3, a3, a4 ; CHECK-RV32-NEXT: slli a3, a3, 1 ; CHECK-RV32-NEXT: add a0, a0, a3 ; CHECK-RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-RV32-NEXT: vle16.v v16, (a0) +; CHECK-RV32-NEXT: vle16.v v8, (a0) ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-RV32-NEXT: viota.m v24, v0 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 4 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-RV32-NEXT: viota.m v8, v0 +; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: viota.m v8, v7 ; CHECK-RV32-NEXT: vmv1r.v v0, v7 ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: li a1, 24 -; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 ; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload @@ -714,7 +714,8 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: vmv.v.v v16, v24 ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 5 +; CHECK-RV32-NEXT: li a1, 24 +; CHECK-RV32-NEXT: mul a0, a0, a1 ; CHECK-RV32-NEXT: add sp, sp, a0 ; CHECK-RV32-NEXT: .cfi_def_cfa sp, 16 ; CHECK-RV32-NEXT: addi sp, sp, 16 @@ -741,30 +742,30 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x ; CHECK-RV64-NEXT: vcpop.m a2, v0 ; CHECK-RV64-NEXT: vcpop.m a3, v7 ; CHECK-RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-RV64-NEXT: vle16.v v24, (a0) +; CHECK-RV64-NEXT: vle16.v v16, (a0) ; CHECK-RV64-NEXT: csrr a4, vlenb ; CHECK-RV64-NEXT: slli a4, a4, 4 ; CHECK-RV64-NEXT: add a4, sp, a4 ; CHECK-RV64-NEXT: addi a4, a4, 16 -; CHECK-RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: slli a2, a2, 1 ; CHECK-RV64-NEXT: add a0, a0, a2 ; CHECK-RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; CHECK-RV64-NEXT: vle16.v v24, (a0) +; CHECK-RV64-NEXT: vle16.v v16, (a0) ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: li a2, 24 ; CHECK-RV64-NEXT: mul a0, a0, a2 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-RV64-NEXT: viota.m v24, v0 +; CHECK-RV64-NEXT: viota.m v16, v0 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vrgather.vv v8, v24, v16, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: viota.m v16, v7 @@ -779,18 +780,19 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x ; CHECK-RV64-NEXT: mul a0, a0, a1 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-RV64-NEXT: vmv.v.v v16, v8 ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: csrr a0, vlenb @@ -1015,30 +1017,30 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32> ; CHECK-RV32-NEXT: vcpop.m a2, v0 ; CHECK-RV32-NEXT: vcpop.m a3, v7 ; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-RV32-NEXT: vle32.v v24, (a0) +; CHECK-RV32-NEXT: vle32.v v16, (a0) ; CHECK-RV32-NEXT: csrr a4, vlenb ; CHECK-RV32-NEXT: slli a4, a4, 4 ; CHECK-RV32-NEXT: add a4, sp, a4 ; CHECK-RV32-NEXT: addi a4, a4, 16 -; CHECK-RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: slli a2, a2, 2 ; CHECK-RV32-NEXT: add a0, a0, a2 ; CHECK-RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-RV32-NEXT: vle32.v v24, (a0) +; CHECK-RV32-NEXT: vle32.v v16, (a0) ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: li a2, 24 ; CHECK-RV32-NEXT: mul a0, a0, a2 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-RV32-NEXT: viota.m v24, v0 +; CHECK-RV32-NEXT: viota.m v16, v0 ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vrgather.vv v8, v24, v16, v0.t ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: viota.m v16, v7 @@ -1053,18 +1055,19 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32> ; CHECK-RV32-NEXT: mul a0, a0, a1 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t +; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-RV32-NEXT: vmv.v.v v16, v8 ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb @@ -1088,71 +1091,73 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32> ; CHECK-RV64-NEXT: add a1, sp, a1 ; CHECK-RV64-NEXT: addi a1, a1, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: li a1, 32 ; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 +; CHECK-RV64-NEXT: li a1, 32 ; CHECK-RV64-NEXT: vslidedown.vi v7, v0, 4 ; CHECK-RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-RV64-NEXT: vmv.x.s a2, v0 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-RV64-NEXT: vcpop.m a3, v0 ; CHECK-RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-RV64-NEXT: vle32.v v24, (a0) +; CHECK-RV64-NEXT: vle32.v v8, (a0) ; CHECK-RV64-NEXT: csrr a3, vlenb ; CHECK-RV64-NEXT: li a4, 24 ; CHECK-RV64-NEXT: mul a3, a3, a4 ; CHECK-RV64-NEXT: add a3, sp, a3 ; CHECK-RV64-NEXT: addi a3, a3, 16 -; CHECK-RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-RV64-NEXT: vcpop.m a3, v7 ; CHECK-RV64-NEXT: cpopw a2, a2 ; CHECK-RV64-NEXT: slli a2, a2, 2 ; CHECK-RV64-NEXT: add a0, a0, a2 ; CHECK-RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-RV64-NEXT: vle32.v v16, (a0) +; CHECK-RV64-NEXT: vle32.v v8, (a0) ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-RV64-NEXT: viota.m v24, v0 +; CHECK-RV64-NEXT: viota.m v8, v0 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: li a1, 24 ; CHECK-RV64-NEXT: mul a0, a0, a1 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 -; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: viota.m v16, v7 +; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: viota.m v8, v7 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: li a1, 24 ; CHECK-RV64-NEXT: mul a0, a0, a1 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vmv1r.v v0, v7 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: li a1, 24 ; CHECK-RV64-NEXT: mul a0, a0, a1 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vrgather.vv v24, v8, v16, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vmv.v.v v16, v24 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 5 ; CHECK-RV64-NEXT: add sp, sp, a0 @@ -1331,15 +1336,16 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV32-NEXT: addi a1, a1, 16 ; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vcpop.m a1, v0 ; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-RV32-NEXT: vle64.v v24, (a0) +; CHECK-RV32-NEXT: vle64.v v8, (a0) ; CHECK-RV32-NEXT: csrr a1, vlenb ; CHECK-RV32-NEXT: li a2, 24 ; CHECK-RV32-NEXT: mul a1, a1, a2 ; CHECK-RV32-NEXT: add a1, sp, a1 ; CHECK-RV32-NEXT: addi a1, a1, 16 -; CHECK-RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vmv.x.s a1, v0 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v7, v0, 2 @@ -1350,50 +1356,39 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-RV32-NEXT: vcpop.m a1, v7 ; CHECK-RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-RV32-NEXT: vle64.v v16, (a0) +; CHECK-RV32-NEXT: vle64.v v8, (a0) ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; CHECK-RV32-NEXT: viota.m v24, v0 +; CHECK-RV32-NEXT: viota.m v8, v0 ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: li a1, 24 ; CHECK-RV32-NEXT: mul a0, a0, a1 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: viota.m v16, v7 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: li a1, 24 -; CHECK-RV32-NEXT: mul a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 ; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: viota.m v8, v7 ; CHECK-RV32-NEXT: vmv1r.v v0, v7 ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 3 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 ; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: li a1, 24 -; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t +; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vrgather.vv v24, v16, v8, v0.t ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vmv.v.v v16, v24 ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 5 ; CHECK-RV32-NEXT: add sp, sp, a0 @@ -1416,15 +1411,16 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV64-NEXT: addi a1, a1, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vcpop.m a1, v0 ; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-RV64-NEXT: vle64.v v24, (a0) +; CHECK-RV64-NEXT: vle64.v v8, (a0) ; CHECK-RV64-NEXT: csrr a1, vlenb ; CHECK-RV64-NEXT: li a2, 24 ; CHECK-RV64-NEXT: mul a1, a1, a2 ; CHECK-RV64-NEXT: add a1, sp, a1 ; CHECK-RV64-NEXT: addi a1, a1, 16 -; CHECK-RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vmv.x.s a1, v0 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v7, v0, 2 @@ -1435,50 +1431,39 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-RV64-NEXT: vcpop.m a1, v7 ; CHECK-RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-RV64-NEXT: vle64.v v16, (a0) +; CHECK-RV64-NEXT: vle64.v v8, (a0) ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; CHECK-RV64-NEXT: viota.m v24, v0 +; CHECK-RV64-NEXT: viota.m v8, v0 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: li a1, 24 ; CHECK-RV64-NEXT: mul a0, a0, a1 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 -; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: viota.m v16, v7 -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: viota.m v8, v7 ; CHECK-RV64-NEXT: vmv1r.v v0, v7 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 3 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vrgather.vv v24, v16, v8, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vmv.v.v v16, v24 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 5 ; CHECK-RV64-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index 796f8dde58f47..dfb303856505f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -143,17 +143,17 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: vl8r.v v8, (a0) ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: vl8r.v v24, (a0) +; RV32-NEXT: vl8r.v v16, (a0) ; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-NEXT: vmseq.vi v0, v8, 0 -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: vmseq.vi v8, v16, 0 +; RV32-NEXT: vmerge.vim v16, v24, 1, v0 +; RV32-NEXT: vs8r.v v16, (a3) ; RV32-NEXT: add a2, a3, a2 -; RV32-NEXT: vmseq.vi v8, v24, 0 -; RV32-NEXT: vmerge.vim v24, v16, 1, v0 -; RV32-NEXT: vs8r.v v24, (a3) ; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v16, 1, v0 +; RV32-NEXT: vmerge.vim v8, v24, 1, v0 ; RV32-NEXT: vs8r.v v8, (a2) ; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -80 @@ -183,17 +183,17 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: vl8r.v v8, (a0) ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: vl8r.v v24, (a0) +; RV64-NEXT: vl8r.v v16, (a0) ; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-NEXT: vmseq.vi v0, v8, 0 -; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vmv.v.i v24, 0 ; RV64-NEXT: add a1, a3, a1 +; RV64-NEXT: vmseq.vi v8, v16, 0 +; RV64-NEXT: vmerge.vim v16, v24, 1, v0 +; RV64-NEXT: vs8r.v v16, (a3) ; RV64-NEXT: add a2, a3, a2 -; RV64-NEXT: vmseq.vi v8, v24, 0 -; RV64-NEXT: vmerge.vim v24, v16, 1, v0 -; RV64-NEXT: vs8r.v v24, (a3) ; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vmerge.vim v8, v16, 1, v0 +; RV64-NEXT: vmerge.vim v8, v24, 1, v0 ; RV64-NEXT: vs8r.v v8, (a2) ; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -80 diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll index 1626b362fed15..98df0c768d19e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll @@ -12,9 +12,9 @@ define @ceil_nxv1f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -35,9 +35,9 @@ define @ceil_nxv2f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -58,9 +58,9 @@ define @ceil_nxv4f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -81,9 +81,9 @@ define @ceil_nxv8f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -104,9 +104,9 @@ define @ceil_nxv16f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -127,9 +127,9 @@ define @ceil_nxv32f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -150,9 +150,9 @@ define @ceil_nxv1f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -173,9 +173,9 @@ define @ceil_nxv2f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -196,9 +196,9 @@ define @ceil_nxv4f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -219,9 +219,9 @@ define @ceil_nxv8f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -242,9 +242,9 @@ define @ceil_nxv16f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -265,9 +265,9 @@ define @ceil_nxv1f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -288,9 +288,9 @@ define @ceil_nxv2f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -311,9 +311,9 @@ define @ceil_nxv4f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -334,9 +334,9 @@ define @ceil_nxv8f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll index 4aca2d694dfbb..ace9ac16cfb95 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -18,11 +18,11 @@ define @ceil_nxv1bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -41,11 +41,11 @@ define @ceil_nxv2bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -64,11 +64,11 @@ define @ceil_nxv4bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -87,11 +87,11 @@ define @ceil_nxv8bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -110,11 +110,11 @@ define @ceil_nxv16bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -133,11 +133,11 @@ define @ceil_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -171,8 +171,8 @@ define @ceil_nxv1f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -185,11 +185,11 @@ define @ceil_nxv1f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -210,8 +210,8 @@ define @ceil_nxv2f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -224,11 +224,11 @@ define @ceil_nxv2f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -249,8 +249,8 @@ define @ceil_nxv4f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -263,11 +263,11 @@ define @ceil_nxv4f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -288,8 +288,8 @@ define @ceil_nxv8f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -302,11 +302,11 @@ define @ceil_nxv8f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -327,8 +327,8 @@ define @ceil_nxv16f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -341,11 +341,11 @@ define @ceil_nxv16f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -366,8 +366,8 @@ define @ceil_nxv32f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -380,11 +380,11 @@ define @ceil_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -419,8 +419,8 @@ define @ceil_nxv1f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -439,8 +439,8 @@ define @ceil_nxv2f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -459,8 +459,8 @@ define @ceil_nxv4f32( %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -479,8 +479,8 @@ define @ceil_nxv8f32( %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -499,8 +499,8 @@ define @ceil_nxv16f32( %x) { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -519,8 +519,8 @@ define @ceil_nxv1f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -539,8 +539,8 @@ define @ceil_nxv2f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -559,8 +559,8 @@ define @ceil_nxv4f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -579,8 +579,8 @@ define @ceil_nxv8f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll index d93f15ec44053..71ac63e920a7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll @@ -12,9 +12,9 @@ define @floor_nxv1f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -35,9 +35,9 @@ define @floor_nxv2f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -58,9 +58,9 @@ define @floor_nxv4f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -81,9 +81,9 @@ define @floor_nxv8f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -104,9 +104,9 @@ define @floor_nxv16f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -127,9 +127,9 @@ define @floor_nxv32f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -150,9 +150,9 @@ define @floor_nxv1f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -173,9 +173,9 @@ define @floor_nxv2f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -196,9 +196,9 @@ define @floor_nxv4f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -219,9 +219,9 @@ define @floor_nxv8f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -242,9 +242,9 @@ define @floor_nxv16f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -265,9 +265,9 @@ define @floor_nxv1f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -288,9 +288,9 @@ define @floor_nxv2f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -311,9 +311,9 @@ define @floor_nxv4f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -334,9 +334,9 @@ define @floor_nxv8f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll index 010d7786c8891..162e388c2e4df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -18,11 +18,11 @@ define @floor_nxv1bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -42,11 +42,11 @@ define @floor_nxv2bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -66,11 +66,11 @@ define @floor_nxv4bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -90,11 +90,11 @@ define @floor_nxv8bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -114,11 +114,11 @@ define @floor_nxv16bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -138,11 +138,11 @@ define @floor_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -177,8 +177,8 @@ define @floor_nxv1f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -191,11 +191,11 @@ define @floor_nxv1f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -216,8 +216,8 @@ define @floor_nxv2f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -230,11 +230,11 @@ define @floor_nxv2f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -255,8 +255,8 @@ define @floor_nxv4f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -269,11 +269,11 @@ define @floor_nxv4f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -294,8 +294,8 @@ define @floor_nxv8f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -308,11 +308,11 @@ define @floor_nxv8f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -333,8 +333,8 @@ define @floor_nxv16f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -347,11 +347,11 @@ define @floor_nxv16f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -372,8 +372,8 @@ define @floor_nxv32f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -386,11 +386,11 @@ define @floor_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -425,8 +425,8 @@ define @floor_nxv1f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -445,8 +445,8 @@ define @floor_nxv2f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -465,8 +465,8 @@ define @floor_nxv4f32( %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -485,8 +485,8 @@ define @floor_nxv8f32( %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -505,8 +505,8 @@ define @floor_nxv16f32( %x) { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -525,8 +525,8 @@ define @floor_nxv1f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -545,8 +545,8 @@ define @floor_nxv2f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -565,8 +565,8 @@ define @floor_nxv4f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -585,8 +585,8 @@ define @floor_nxv8f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll index 1752dfd50d0c5..f7aa807dbe3bb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll @@ -104,11 +104,8 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: addi s0, sp, 1536 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -512 -; CHECK-NEXT: addi a0, sp, 1520 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a0, 512 @@ -137,22 +134,24 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v24, a1 ; CHECK-NEXT: li a1, 501 -; CHECK-NEXT: lui a2, %hi(.LCPI2_1) -; CHECK-NEXT: addi a2, a2, %lo(.LCPI2_1) -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v0, (a2) -; CHECK-NEXT: li a2, 500 -; CHECK-NEXT: vmv.s.x v24, a3 -; CHECK-NEXT: lui a3, %hi(.LCPI2_0) -; CHECK-NEXT: addi a3, a3, %lo(.LCPI2_0) +; CHECK-NEXT: lui a2, %hi(.LCPI2_0) +; CHECK-NEXT: addi a2, a2, %lo(.LCPI2_0) ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v16, (a3) +; CHECK-NEXT: vle8.v v24, (a2) +; CHECK-NEXT: li a2, 500 +; CHECK-NEXT: vmv.s.x v0, a3 +; CHECK-NEXT: lui a3, %hi(.LCPI2_1) +; CHECK-NEXT: addi a3, a3, %lo(.LCPI2_1) +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; CHECK-NEXT: vle64.v v3, (a3) +; CHECK-NEXT: addi a3, sp, 1520 +; CHECK-NEXT: vs1r.v v3, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v24, a2 +; CHECK-NEXT: vslideup.vx v8, v0, a2 ; CHECK-NEXT: addi a1, sp, 1520 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vrgather.vv v8, v24, v16, v0.t +; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-NEXT: addi sp, s0, -1536 ; CHECK-NEXT: .cfi_def_cfa sp, 1536 ; CHECK-NEXT: ld ra, 1528(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 025e7ddaf997a..4ad5848aef6cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -11,20 +11,20 @@ define <2 x i8> @vp_bitreverse_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: li a0, 51 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret @@ -65,20 +65,20 @@ define <4 x i8> @vp_bitreverse_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: li a0, 51 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret @@ -119,20 +119,20 @@ define <8 x i8> @vp_bitreverse_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: li a0, 51 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret @@ -173,20 +173,20 @@ define <16 x i8> @vp_bitreverse_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v9, v8, 15, v0.t +; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: li a0, 51 ; CHECK-NEXT: vsll.vi v9, v9, 4, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret @@ -227,27 +227,27 @@ define <2 x i16> @vp_bitreverse_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t -; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t -; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, -241 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 3 ; CHECK-NEXT: addi a0, a0, 819 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret @@ -295,27 +295,27 @@ define <4 x i16> @vp_bitreverse_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t -; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t -; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, -241 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 3 ; CHECK-NEXT: addi a0, a0, 819 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret @@ -363,27 +363,27 @@ define <8 x i16> @vp_bitreverse_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t -; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t -; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, -241 ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 3 ; CHECK-NEXT: addi a0, a0, 819 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret @@ -431,27 +431,27 @@ define <16 x i16> @vp_bitreverse_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroex ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t -; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t -; CHECK-NEXT: addi a0, a0, -241 +; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: addi a0, a0, -241 ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: lui a0, 3 ; CHECK-NEXT: addi a0, a0, 819 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t ; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret @@ -500,34 +500,34 @@ define <2 x i32> @vp_bitreverse_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t ; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t ; CHECK-NEXT: addi a0, a0, -256 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t -; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v10, v8, 24, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t ; CHECK-NEXT: lui a0, 61681 ; CHECK-NEXT: addi a0, a0, -241 -; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 209715 ; CHECK-NEXT: addi a0, a0, 819 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret @@ -584,34 +584,34 @@ define <4 x i32> @vp_bitreverse_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t ; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t ; CHECK-NEXT: addi a0, a0, -256 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t -; CHECK-NEXT: vand.vx v10, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v10, v8, 24, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t ; CHECK-NEXT: lui a0, 61681 ; CHECK-NEXT: addi a0, a0, -241 -; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t -; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v10, v8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 209715 ; CHECK-NEXT: addi a0, a0, 819 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t -; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret @@ -668,34 +668,34 @@ define <8 x i32> @vp_bitreverse_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t ; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t ; CHECK-NEXT: addi a0, a0, -256 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t -; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t ; CHECK-NEXT: vor.vv v10, v10, v12, v0.t -; CHECK-NEXT: vand.vx v12, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v12, v8, 24, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t ; CHECK-NEXT: lui a0, 61681 ; CHECK-NEXT: addi a0, a0, -241 -; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t -; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v12, v8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: lui a0, 209715 ; CHECK-NEXT: addi a0, a0, 819 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t ; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t -; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret @@ -752,34 +752,34 @@ define <16 x i32> @vp_bitreverse_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t ; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t ; CHECK-NEXT: addi a0, a0, -256 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t ; CHECK-NEXT: vor.vv v12, v12, v16, v0.t -; CHECK-NEXT: vand.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v16, v8, 24, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t ; CHECK-NEXT: lui a0, 61681 ; CHECK-NEXT: addi a0, a0, -241 -; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 24, v0.t -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: vor.vv v8, v16, v8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t -; CHECK-NEXT: vand.vx v12, v12, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t ; CHECK-NEXT: lui a0, 209715 ; CHECK-NEXT: addi a0, a0, 819 ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t ; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t -; CHECK-NEXT: vand.vx v12, v12, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t -; CHECK-NEXT: vand.vx v12, v12, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v12, v12, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret @@ -835,67 +835,68 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: lui a2, 1044480 ; RV32-NEXT: li a3, 56 -; RV32-NEXT: lui a5, 16 -; RV32-NEXT: li a2, 40 +; RV32-NEXT: lui a4, 16 +; RV32-NEXT: li a5, 40 ; RV32-NEXT: lui a1, 4080 ; RV32-NEXT: addi a6, sp, 8 -; RV32-NEXT: sw a4, 8(sp) -; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a6), zero -; RV32-NEXT: lui a4, 61681 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vsrl.vi v9, v8, 24, v0.t +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsll.vx v10, v8, a3, v0.t -; RV32-NEXT: addi a5, a5, -256 -; RV32-NEXT: vand.vx v11, v8, a5, v0.t -; RV32-NEXT: vsll.vx v11, v11, a2, v0.t -; RV32-NEXT: vor.vv v10, v10, v11, v0.t -; RV32-NEXT: vand.vx v11, v8, a1, v0.t -; RV32-NEXT: vsll.vi v11, v11, 24, v0.t -; RV32-NEXT: vand.vv v12, v8, v9, v0.t -; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vor.vv v11, v11, v12, v0.t -; RV32-NEXT: vor.vv v10, v10, v11, v0.t +; RV32-NEXT: addi a2, a4, -256 ; RV32-NEXT: vsrl.vx v11, v8, a3, v0.t -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vand.vx v12, v12, a5, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a5, v0.t +; RV32-NEXT: vand.vx v13, v8, a2, v0.t +; RV32-NEXT: vand.vx v12, v12, a2, v0.t ; RV32-NEXT: vor.vv v11, v12, v11, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV32-NEXT: vand.vx v12, v12, a1, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v9, a4 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v12, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vsll.vx v13, v13, a5, v0.t +; RV32-NEXT: vor.vv v10, v10, v13, v0.t +; RV32-NEXT: vsrl.vi v13, v8, 8, v0.t +; RV32-NEXT: vand.vx v9, v9, a1, v0.t +; RV32-NEXT: vand.vv v13, v13, v12, v0.t +; RV32-NEXT: vor.vv v9, v13, v9, v0.t +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: lui a4, 349525 +; RV32-NEXT: vand.vv v12, v8, v12, v0.t +; RV32-NEXT: vand.vx v8, v8, a1, v0.t +; RV32-NEXT: addi a1, a2, -241 +; RV32-NEXT: addi a2, a3, 819 +; RV32-NEXT: addi a3, a4, 1365 +; RV32-NEXT: vsll.vi v8, v8, 24, v0.t +; RV32-NEXT: vsll.vi v12, v12, 8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v12, a3 +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vor.vv v8, v8, v11, v0.t +; RV32-NEXT: vor.vv v9, v9, v11, v0.t ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v11, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vor.vv v8, v10, v8, v0.t -; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV32-NEXT: vand.vv v10, v10, v9, v0.t -; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v10, a3 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vor.vv v8, v8, v9, v0.t +; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v9, v9, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t +; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV32-NEXT: vand.vv v9, v9, v12, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v8, v8, v11, v0.t +; RV32-NEXT: vand.vv v9, v9, v11, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV32-NEXT: vand.vv v9, v9, v11, v0.t -; RV32-NEXT: vand.vv v8, v8, v11, v0.t +; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v9, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 @@ -905,59 +906,59 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e ; RV64-LABEL: vp_bitreverse_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: lui a1, 4080 -; RV64-NEXT: li a3, 255 -; RV64-NEXT: li a2, 56 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: lui a4, 16 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 209715 -; RV64-NEXT: lui a7, 349525 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: addiw a6, a6, 819 -; RV64-NEXT: addiw a7, a7, 1365 -; RV64-NEXT: slli t0, a5, 32 -; RV64-NEXT: add t0, a5, t0 -; RV64-NEXT: slli a5, a6, 32 -; RV64-NEXT: add a6, a6, a5 -; RV64-NEXT: slli a5, a7, 32 -; RV64-NEXT: add a5, a7, a5 -; RV64-NEXT: li a7, 40 +; RV64-NEXT: li a5, 40 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vand.vx v9, v8, a1, v0.t -; RV64-NEXT: slli a3, a3, 24 +; RV64-NEXT: vsrl.vi v9, v8, 24, v0.t +; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t ; RV64-NEXT: addiw a0, a4, -256 -; RV64-NEXT: vsll.vi v9, v9, 24, v0.t -; RV64-NEXT: vand.vx v10, v8, a3, v0.t -; RV64-NEXT: vsll.vi v10, v10, 8, v0.t -; RV64-NEXT: vor.vv v9, v9, v10, v0.t -; RV64-NEXT: vsll.vx v10, v8, a2, v0.t -; RV64-NEXT: vand.vx v11, v8, a0, v0.t -; RV64-NEXT: vsll.vx v11, v11, a7, v0.t -; RV64-NEXT: vor.vv v10, v10, v11, v0.t +; RV64-NEXT: vsrl.vx v11, v8, a3, v0.t +; RV64-NEXT: vsrl.vx v12, v8, a5, v0.t +; RV64-NEXT: vand.vx v12, v12, a0, v0.t +; RV64-NEXT: vor.vv v11, v12, v11, v0.t +; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: slli a2, a2, 24 +; RV64-NEXT: vand.vx v9, v9, a1, v0.t +; RV64-NEXT: vsll.vi v12, v12, 24, v0.t +; RV64-NEXT: vand.vx v10, v10, a2, v0.t ; RV64-NEXT: vor.vv v9, v10, v9, v0.t -; RV64-NEXT: vsrl.vx v10, v8, a2, v0.t -; RV64-NEXT: vsrl.vx v11, v8, a7, v0.t -; RV64-NEXT: vand.vx v11, v11, a0, v0.t -; RV64-NEXT: vor.vv v10, v11, v10, v0.t -; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t -; RV64-NEXT: vand.vx v11, v11, a1, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vor.vv v8, v8, v11, v0.t +; RV64-NEXT: vand.vx v10, v8, a2, v0.t +; RV64-NEXT: vsll.vi v10, v10, 8, v0.t +; RV64-NEXT: vor.vv v10, v12, v10, v0.t +; RV64-NEXT: vsll.vx v12, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vx v8, v8, a5, v0.t +; RV64-NEXT: vor.vv v8, v12, v8, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: lui a1, 209715 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a3, a0, 32 +; RV64-NEXT: slli a4, a1, 32 +; RV64-NEXT: add a0, a0, a3 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a1, a1, a4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: vor.vv v9, v9, v11, v0.t ; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t +; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t -; RV64-NEXT: vand.vx v9, v9, t0, v0.t -; RV64-NEXT: vand.vx v8, v8, t0, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vand.vx v9, v9, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t -; RV64-NEXT: vand.vx v9, v9, a6, v0.t -; RV64-NEXT: vand.vx v8, v8, a6, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v9, v9, a1, v0.t ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vand.vx v9, v9, a5, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v9, v9, a2, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v9, v8, v0.t ; RV64-NEXT: ret @@ -1108,67 +1109,68 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: lui a2, 1044480 ; RV32-NEXT: li a3, 56 -; RV32-NEXT: lui a5, 16 -; RV32-NEXT: li a2, 40 +; RV32-NEXT: lui a4, 16 +; RV32-NEXT: li a5, 40 ; RV32-NEXT: lui a1, 4080 ; RV32-NEXT: addi a6, sp, 8 -; RV32-NEXT: sw a4, 8(sp) +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vsrl.vi v14, v8, 24, v0.t +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: vsll.vx v12, v8, a3, v0.t +; RV32-NEXT: addi a2, a4, -256 +; RV32-NEXT: vsrl.vx v10, v8, a3, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a5, v0.t +; RV32-NEXT: vand.vx v18, v8, a2, v0.t +; RV32-NEXT: vand.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v10, v16, v10, v0.t ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v10, (a6), zero -; RV32-NEXT: lui a4, 61681 +; RV32-NEXT: vlse64.v v16, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v12, v8, a3, v0.t -; RV32-NEXT: addi a5, a5, -256 -; RV32-NEXT: vand.vx v14, v8, a5, v0.t -; RV32-NEXT: vsll.vx v14, v14, a2, v0.t -; RV32-NEXT: vor.vv v12, v12, v14, v0.t -; RV32-NEXT: vand.vx v14, v8, a1, v0.t -; RV32-NEXT: vsll.vi v14, v14, 24, v0.t -; RV32-NEXT: vand.vv v16, v8, v10, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v14, v14, v16, v0.t -; RV32-NEXT: vor.vv v12, v12, v14, v0.t -; RV32-NEXT: vsrl.vx v14, v8, a3, v0.t +; RV32-NEXT: vsll.vx v18, v18, a5, v0.t +; RV32-NEXT: vor.vv v12, v12, v18, v0.t +; RV32-NEXT: vsrl.vi v18, v8, 8, v0.t +; RV32-NEXT: vand.vx v14, v14, a1, v0.t +; RV32-NEXT: vand.vv v18, v18, v16, v0.t +; RV32-NEXT: vor.vv v14, v18, v14, v0.t +; RV32-NEXT: lui a2, 61681 ; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vand.vx v16, v16, a5, v0.t -; RV32-NEXT: vor.vv v14, v16, v14, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV32-NEXT: vand.vx v16, v16, a1, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v10, a4 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: lui a4, 349525 +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vx v8, v8, a1, v0.t +; RV32-NEXT: addi a1, a2, -241 +; RV32-NEXT: addi a2, a3, 819 +; RV32-NEXT: addi a3, a4, 1365 +; RV32-NEXT: vsll.vi v8, v8, 24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vor.vv v8, v8, v14, v0.t +; RV32-NEXT: vor.vv v10, v14, v10, v0.t ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v14, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: vand.vv v12, v12, v10, v0.t -; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v12, a3 +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vor.vv v8, v8, v10, v0.t +; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v10, v10, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t +; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV32-NEXT: vand.vv v10, v10, v16, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v14, v0.t +; RV32-NEXT: vand.vv v10, v10, v14, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV32-NEXT: vand.vv v10, v10, v14, v0.t -; RV32-NEXT: vand.vv v8, v8, v14, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v10, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 @@ -1178,59 +1180,59 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e ; RV64-LABEL: vp_bitreverse_v4i64: ; RV64: # %bb.0: ; RV64-NEXT: lui a1, 4080 -; RV64-NEXT: li a3, 255 -; RV64-NEXT: li a2, 56 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: lui a4, 16 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 209715 -; RV64-NEXT: lui a7, 349525 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: addiw a6, a6, 819 -; RV64-NEXT: addiw a7, a7, 1365 -; RV64-NEXT: slli t0, a5, 32 -; RV64-NEXT: add t0, a5, t0 -; RV64-NEXT: slli a5, a6, 32 -; RV64-NEXT: add a6, a6, a5 -; RV64-NEXT: slli a5, a7, 32 -; RV64-NEXT: add a5, a7, a5 -; RV64-NEXT: li a7, 40 +; RV64-NEXT: li a5, 40 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vand.vx v10, v8, a1, v0.t -; RV64-NEXT: slli a3, a3, 24 +; RV64-NEXT: vsrl.vi v12, v8, 24, v0.t +; RV64-NEXT: vsrl.vi v14, v8, 8, v0.t ; RV64-NEXT: addiw a0, a4, -256 -; RV64-NEXT: vsll.vi v10, v10, 24, v0.t -; RV64-NEXT: vand.vx v12, v8, a3, v0.t -; RV64-NEXT: vsll.vi v12, v12, 8, v0.t -; RV64-NEXT: vor.vv v10, v10, v12, v0.t -; RV64-NEXT: vsll.vx v12, v8, a2, v0.t -; RV64-NEXT: vand.vx v14, v8, a0, v0.t -; RV64-NEXT: vsll.vx v14, v14, a7, v0.t -; RV64-NEXT: vor.vv v12, v12, v14, v0.t -; RV64-NEXT: vor.vv v10, v12, v10, v0.t -; RV64-NEXT: vsrl.vx v12, v8, a2, v0.t -; RV64-NEXT: vsrl.vx v14, v8, a7, v0.t -; RV64-NEXT: vand.vx v14, v14, a0, v0.t +; RV64-NEXT: vsrl.vx v10, v8, a3, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t +; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vor.vv v10, v16, v10, v0.t +; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: slli a2, a2, 24 +; RV64-NEXT: vand.vx v12, v12, a1, v0.t +; RV64-NEXT: vsll.vi v16, v16, 24, v0.t +; RV64-NEXT: vand.vx v14, v14, a2, v0.t ; RV64-NEXT: vor.vv v12, v14, v12, v0.t -; RV64-NEXT: vsrl.vi v14, v8, 24, v0.t -; RV64-NEXT: vand.vx v14, v14, a1, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v14, v8, a2, v0.t +; RV64-NEXT: vsll.vi v14, v14, 8, v0.t +; RV64-NEXT: vor.vv v14, v16, v14, v0.t +; RV64-NEXT: vsll.vx v16, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vx v8, v8, a5, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: lui a1, 209715 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a3, a0, 32 +; RV64-NEXT: slli a4, a1, 32 +; RV64-NEXT: add a0, a0, a3 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a1, a1, a4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: vor.vv v10, v12, v10, v0.t ; RV64-NEXT: vor.vv v8, v8, v14, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t +; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t -; RV64-NEXT: vand.vx v10, v10, t0, v0.t -; RV64-NEXT: vand.vx v8, v8, t0, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vand.vx v10, v10, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t -; RV64-NEXT: vand.vx v10, v10, a6, v0.t -; RV64-NEXT: vand.vx v8, v8, a6, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v10, v10, a1, v0.t ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vand.vx v10, v10, a5, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v10, v10, a2, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v10, v8, v0.t ; RV64-NEXT: ret @@ -1381,69 +1383,70 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: lui a2, 1044480 ; RV32-NEXT: li a3, 56 -; RV32-NEXT: lui a5, 16 -; RV32-NEXT: li a2, 40 +; RV32-NEXT: lui a4, 16 +; RV32-NEXT: li a5, 40 ; RV32-NEXT: lui a1, 4080 ; RV32-NEXT: addi a6, sp, 8 -; RV32-NEXT: sw a4, 8(sp) +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vsrl.vi v20, v8, 24, v0.t +; RV32-NEXT: sw a2, 8(sp) ; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: vsll.vx v16, v8, a3, v0.t +; RV32-NEXT: addi a2, a4, -256 +; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV32-NEXT: vand.vx v28, v8, a2, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v12, v24, v12, v0.t ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a6), zero -; RV32-NEXT: lui a4, 61681 +; RV32-NEXT: vlse64.v v24, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a3, v0.t -; RV32-NEXT: addi a5, a5, -256 -; RV32-NEXT: vand.vx v20, v8, a5, v0.t -; RV32-NEXT: vsll.vx v20, v20, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v20, v0.t -; RV32-NEXT: vand.vx v20, v8, a1, v0.t -; RV32-NEXT: vsll.vi v20, v20, 24, v0.t -; RV32-NEXT: vand.vv v24, v8, v12, v0.t -; RV32-NEXT: vsll.vi v24, v24, 8, v0.t -; RV32-NEXT: vor.vv v20, v20, v24, v0.t -; RV32-NEXT: vor.vv v16, v16, v20, v0.t -; RV32-NEXT: vsrl.vx v20, v8, a3, v0.t +; RV32-NEXT: vsll.vx v28, v28, a5, v0.t +; RV32-NEXT: vor.vv v16, v16, v28, v0.t +; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t +; RV32-NEXT: vand.vx v20, v20, a1, v0.t +; RV32-NEXT: vand.vv v28, v28, v24, v0.t +; RV32-NEXT: vor.vv v20, v28, v20, v0.t +; RV32-NEXT: lui a2, 61681 ; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vand.vx v24, v24, a5, v0.t -; RV32-NEXT: vor.vv v20, v24, v20, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a1, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: lui a4, 349525 +; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vand.vx v8, v8, a1, v0.t +; RV32-NEXT: addi a1, a2, -241 +; RV32-NEXT: addi a2, a3, 819 +; RV32-NEXT: addi a3, a4, 1365 +; RV32-NEXT: vsll.vi v8, v8, 24, v0.t +; RV32-NEXT: vsll.vi v24, v24, 8, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v28, a4 +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vor.vv v20, v20, v12, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a3 +; RV32-NEXT: vmv.v.x v12, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vor.vv v20, v8, v20, v0.t +; RV32-NEXT: vor.vv v16, v16, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vor.vv v16, v16, v20, v0.t ; RV32-NEXT: vsrl.vi v20, v16, 4, v0.t -; RV32-NEXT: vand.vv v20, v20, v28, v0.t -; RV32-NEXT: vand.vv v16, v16, v28, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vv v20, v20, v24, v0.t ; RV32-NEXT: vsll.vi v16, v16, 4, v0.t ; RV32-NEXT: vor.vv v16, v20, v16, v0.t ; RV32-NEXT: vsrl.vi v20, v16, 2, v0.t -; RV32-NEXT: vand.vv v20, v20, v12, v0.t -; RV32-NEXT: vand.vv v12, v16, v12, v0.t -; RV32-NEXT: vsll.vi v12, v12, 2, v0.t -; RV32-NEXT: vor.vv v12, v20, v12, v0.t +; RV32-NEXT: vand.vv v16, v16, v12, v0.t +; RV32-NEXT: vand.vv v12, v20, v12, v0.t +; RV32-NEXT: vsll.vi v16, v16, 2, v0.t +; RV32-NEXT: vor.vv v12, v12, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v12, 1, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: vand.vv v8, v12, v8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v12, v12, v8, v0.t +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vsll.vi v12, v12, 1, v0.t +; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret @@ -1451,59 +1454,59 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e ; RV64-LABEL: vp_bitreverse_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: lui a1, 4080 -; RV64-NEXT: li a3, 255 -; RV64-NEXT: li a2, 56 +; RV64-NEXT: li a2, 255 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: lui a4, 16 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 209715 -; RV64-NEXT: lui a7, 349525 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: addiw a6, a6, 819 -; RV64-NEXT: addiw a7, a7, 1365 -; RV64-NEXT: slli t0, a5, 32 -; RV64-NEXT: add t0, a5, t0 -; RV64-NEXT: slli a5, a6, 32 -; RV64-NEXT: add a6, a6, a5 -; RV64-NEXT: slli a5, a7, 32 -; RV64-NEXT: add a5, a7, a5 -; RV64-NEXT: li a7, 40 +; RV64-NEXT: li a5, 40 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vand.vx v12, v8, a1, v0.t -; RV64-NEXT: slli a3, a3, 24 -; RV64-NEXT: addiw a0, a4, -256 -; RV64-NEXT: vsll.vi v12, v12, 24, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsll.vi v16, v16, 8, v0.t -; RV64-NEXT: vor.vv v12, v12, v16, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vand.vx v20, v8, a0, v0.t -; RV64-NEXT: vsll.vx v20, v20, a7, v0.t -; RV64-NEXT: vor.vv v16, v16, v20, v0.t -; RV64-NEXT: vor.vv v12, v16, v12, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vsrl.vx v20, v8, a7, v0.t -; RV64-NEXT: vand.vx v20, v20, a0, v0.t -; RV64-NEXT: vor.vv v16, v20, v16, v0.t ; RV64-NEXT: vsrl.vi v20, v8, 24, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV64-NEXT: addiw a0, a4, -256 +; RV64-NEXT: vsrl.vx v12, v8, a3, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV64-NEXT: vand.vx v24, v24, a0, v0.t +; RV64-NEXT: vor.vv v12, v24, v12, v0.t +; RV64-NEXT: vand.vx v24, v8, a1, v0.t +; RV64-NEXT: slli a2, a2, 24 ; RV64-NEXT: vand.vx v20, v20, a1, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vsll.vi v24, v24, 24, v0.t +; RV64-NEXT: vand.vx v16, v16, a2, v0.t +; RV64-NEXT: vor.vv v16, v16, v20, v0.t +; RV64-NEXT: vand.vx v20, v8, a2, v0.t +; RV64-NEXT: vsll.vi v20, v20, 8, v0.t +; RV64-NEXT: vor.vv v20, v24, v20, v0.t +; RV64-NEXT: vsll.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vx v8, v8, a5, v0.t +; RV64-NEXT: vor.vv v8, v24, v8, v0.t +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: lui a1, 209715 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: addiw a1, a1, 819 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: slli a3, a0, 32 +; RV64-NEXT: slli a4, a1, 32 +; RV64-NEXT: add a0, a0, a3 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a1, a1, a4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: vor.vv v12, v16, v12, v0.t ; RV64-NEXT: vor.vv v8, v8, v20, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t +; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV64-NEXT: vand.vx v12, v12, t0, v0.t -; RV64-NEXT: vand.vx v8, v8, t0, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vand.vx v12, v12, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV64-NEXT: vand.vx v12, v12, a6, v0.t -; RV64-NEXT: vand.vx v8, v8, a6, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v12, v12, a1, v0.t ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vand.vx v12, v12, a5, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v12, v12, a2, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: ret @@ -1655,77 +1658,87 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vx v16, v16, a3, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: li a2, 56 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: li a4, 40 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: lui a2, 4080 +; RV32-NEXT: addi a3, sp, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t -; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v24, v8, a1, v0.t -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV32-NEXT: vand.vx v24, v24, a1, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v16, v24, a2, v0.t ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a3, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: lui a3, 349525 @@ -1734,37 +1747,31 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: addi a3, a3, 1365 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v24, v16, v24, v0.t ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: vmv.v.x v16, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vi v24, v24, 4, v0.t -; RV32-NEXT: vor.vv v24, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: vsll.vi v8, v8, 4, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vi v24, v24, 2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: vand.vv v24, v24, v8, v0.t -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -1776,41 +1783,62 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: li a2, 24 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: li a2, 255 ; RV64-NEXT: li a3, 56 ; RV64-NEXT: lui a4, 16 ; RV64-NEXT: li a5, 40 +; RV64-NEXT: addiw a4, a4, -256 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vand.vx v16, v8, a1, v0.t -; RV64-NEXT: slli a2, a2, 24 -; RV64-NEXT: addiw a0, a4, -256 -; RV64-NEXT: vsll.vi v16, v16, 24, v0.t -; RV64-NEXT: vand.vx v24, v8, a2, v0.t -; RV64-NEXT: vsll.vi v24, v24, 8, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV64-NEXT: vsll.vx v24, v8, a3, v0.t -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsll.vx v16, v16, a5, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV64-NEXT: vand.vx v24, v24, a4, v0.t ; RV64-NEXT: vor.vv v16, v24, v16, v0.t -; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vor.vv v24, v16, v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t +; RV64-NEXT: slli a2, a2, 24 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV64-NEXT: vand.vx v24, v24, a2, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vsll.vi v24, v16, 24, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t +; RV64-NEXT: vsll.vi v16, v16, 8, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vand.vx v24, v8, a4, v0.t +; RV64-NEXT: vsll.vx v24, v24, a5, v0.t +; RV64-NEXT: vsll.vx v8, v8, a3, v0.t +; RV64-NEXT: vor.vv v24, v8, v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: lui a1, 209715 ; RV64-NEXT: lui a2, 349525 @@ -1823,26 +1851,25 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV64-NEXT: slli a3, a2, 32 ; RV64-NEXT: add a1, a1, a4 ; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v16, v16, a2, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 ; RV64-NEXT: addi sp, sp, 16 @@ -2034,77 +2061,87 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vx v16, v16, a3, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: li a2, 56 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: li a4, 40 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: lui a2, 4080 +; RV32-NEXT: addi a3, sp, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t -; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v24, v8, a1, v0.t -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV32-NEXT: vand.vx v24, v24, a1, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v16, v24, a2, v0.t ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a3, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: lui a3, 349525 @@ -2113,37 +2150,31 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: addi a3, a3, 1365 ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v24, v16, v24, v0.t ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: vmv.v.x v16, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vi v24, v24, 4, v0.t -; RV32-NEXT: vor.vv v24, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: vsll.vi v8, v8, 4, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vi v24, v24, 2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: vand.vv v24, v24, v8, v0.t -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -2155,41 +2186,62 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: li a2, 24 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: li a2, 255 ; RV64-NEXT: li a3, 56 ; RV64-NEXT: lui a4, 16 ; RV64-NEXT: li a5, 40 +; RV64-NEXT: addiw a4, a4, -256 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vand.vx v16, v8, a1, v0.t -; RV64-NEXT: slli a2, a2, 24 -; RV64-NEXT: addiw a0, a4, -256 -; RV64-NEXT: vsll.vi v16, v16, 24, v0.t -; RV64-NEXT: vand.vx v24, v8, a2, v0.t -; RV64-NEXT: vsll.vi v24, v24, 8, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV64-NEXT: vsll.vx v24, v8, a3, v0.t -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsll.vx v16, v16, a5, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV64-NEXT: vand.vx v24, v24, a4, v0.t ; RV64-NEXT: vor.vv v16, v24, v16, v0.t -; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vor.vv v24, v16, v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t +; RV64-NEXT: slli a2, a2, 24 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV64-NEXT: vand.vx v24, v24, a2, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vsll.vi v24, v16, 24, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t +; RV64-NEXT: vsll.vi v16, v16, 8, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vand.vx v24, v8, a4, v0.t +; RV64-NEXT: vsll.vx v24, v24, a5, v0.t +; RV64-NEXT: vsll.vx v8, v8, a3, v0.t +; RV64-NEXT: vor.vv v24, v8, v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: lui a1, 209715 ; RV64-NEXT: lui a2, 349525 @@ -2202,26 +2254,25 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV64-NEXT: slli a3, a2, 32 ; RV64-NEXT: add a1, a1, a4 ; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsll.vi v8, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v16, v16, a2, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 ; RV64-NEXT: addi sp, sp, 16 @@ -2410,61 +2461,71 @@ declare <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16>, <128 x i1>, i32) define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_bitreverse_v128i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vslidedown.vi v7, v0, 8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: .LBB34_2: +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vi v24, v8, 8, v0.t -; CHECK-NEXT: lui a1, 1 -; CHECK-NEXT: lui a2, 3 +; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: lui a4, 3 ; CHECK-NEXT: addi a3, a0, -64 ; CHECK-NEXT: sltu a0, a0, a3 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a3, a0, a3 ; CHECK-NEXT: lui a0, 5 -; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t -; CHECK-NEXT: addi a1, a1, -241 -; CHECK-NEXT: addi a2, a2, 819 -; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vor.vv v8, v8, v24, v0.t +; CHECK-NEXT: addi a5, a2, -241 +; CHECK-NEXT: addi a2, a4, 819 +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v24, v8, 4, v0.t -; CHECK-NEXT: vand.vx v24, v24, a1, v0.t -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a5, v0.t +; CHECK-NEXT: vand.vx v24, v24, a5, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v24, v16, 8, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t +; CHECK-NEXT: vor.vv v16, v16, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vi v24, v8, 2, v0.t -; CHECK-NEXT: vand.vx v24, v24, a2, v0.t ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t +; CHECK-NEXT: vand.vx v24, v24, a2, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t ; CHECK-NEXT: vor.vv v8, v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v24, v16, 4, v0.t +; CHECK-NEXT: vand.vx v16, v16, a5, v0.t +; CHECK-NEXT: vand.vx v24, v24, a5, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t +; CHECK-NEXT: vor.vv v16, v24, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vi v24, v8, 1, v0.t -; CHECK-NEXT: vand.vx v24, v24, a0, v0.t ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vor.vv v8, v24, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; CHECK-NEXT: vsrl.vi v24, v16, 8, v0.t -; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t -; CHECK-NEXT: vor.vv v16, v16, v24, v0.t -; CHECK-NEXT: vsrl.vi v24, v16, 4, v0.t -; CHECK-NEXT: vand.vx v24, v24, a1, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t -; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t -; CHECK-NEXT: vor.vv v16, v24, v16, v0.t ; CHECK-NEXT: vsrl.vi v24, v16, 2, v0.t -; CHECK-NEXT: vand.vx v24, v24, a2, v0.t ; CHECK-NEXT: vand.vx v16, v16, a2, v0.t +; CHECK-NEXT: vand.vx v24, v24, a2, v0.t ; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t ; CHECK-NEXT: vor.vv v16, v24, v16, v0.t ; CHECK-NEXT: vsrl.vi v24, v16, 1, v0.t -; CHECK-NEXT: vand.vx v24, v24, a0, v0.t ; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t ; CHECK-NEXT: vsll.vi v16, v16, 1, v0.t ; CHECK-NEXT: vor.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index 5f382c3fdc834..a32dfc3d2c25a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -116,9 +116,9 @@ define <2 x i32> @vp_bswap_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t ; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t ; CHECK-NEXT: addi a0, a0, -256 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t ; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t @@ -158,9 +158,9 @@ define <4 x i32> @vp_bswap_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t ; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t ; CHECK-NEXT: addi a0, a0, -256 ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t -; CHECK-NEXT: vsrl.vi v10, v8, 24, v0.t ; CHECK-NEXT: vor.vv v9, v9, v10, v0.t ; CHECK-NEXT: vand.vx v10, v8, a0, v0.t ; CHECK-NEXT: vsll.vi v10, v10, 8, v0.t @@ -200,9 +200,9 @@ define <8 x i32> @vp_bswap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t ; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t ; CHECK-NEXT: addi a0, a0, -256 ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t -; CHECK-NEXT: vsrl.vi v12, v8, 24, v0.t ; CHECK-NEXT: vor.vv v10, v10, v12, v0.t ; CHECK-NEXT: vand.vx v12, v8, a0, v0.t ; CHECK-NEXT: vsll.vi v12, v12, 8, v0.t @@ -242,9 +242,9 @@ define <16 x i32> @vp_bswap_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t ; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t ; CHECK-NEXT: addi a0, a0, -256 ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 24, v0.t ; CHECK-NEXT: vor.vv v12, v12, v16, v0.t ; CHECK-NEXT: vand.vx v16, v8, a0, v0.t ; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t @@ -289,34 +289,34 @@ define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: li a4, 40 ; RV32-NEXT: lui a5, 4080 ; RV32-NEXT: addi a6, sp, 8 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vsrl.vi v9, v8, 24, v0.t ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v9, v8, a2, v0.t +; RV32-NEXT: vsll.vx v10, v8, a2, v0.t ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v10, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v11, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a4, v0.t +; RV32-NEXT: vand.vx v13, v8, a1, v0.t +; RV32-NEXT: vand.vx v12, v12, a1, v0.t +; RV32-NEXT: vor.vv v11, v12, v11, v0.t ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a6), zero +; RV32-NEXT: vlse64.v v12, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v10, v10, a4, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vand.vx v10, v8, a5, v0.t -; RV32-NEXT: vsll.vi v10, v10, 24, v0.t -; RV32-NEXT: vand.vv v12, v8, v11, v0.t +; RV32-NEXT: vsll.vx v13, v13, a4, v0.t +; RV32-NEXT: vor.vv v10, v10, v13, v0.t +; RV32-NEXT: vsrl.vi v13, v8, 8, v0.t +; RV32-NEXT: vand.vx v9, v9, a5, v0.t +; RV32-NEXT: vand.vv v13, v13, v12, v0.t +; RV32-NEXT: vor.vv v9, v13, v9, v0.t +; RV32-NEXT: vand.vv v12, v8, v12, v0.t +; RV32-NEXT: vand.vx v8, v8, a5, v0.t +; RV32-NEXT: vsll.vi v8, v8, 24, v0.t ; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a4, v0.t -; RV32-NEXT: vand.vx v12, v12, a1, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV32-NEXT: vand.vx v12, v12, a5, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v11, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t +; RV32-NEXT: vor.vv v8, v10, v8, v0.t +; RV32-NEXT: vor.vv v9, v9, v11, v0.t +; RV32-NEXT: vor.vv v8, v8, v9, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret @@ -329,29 +329,29 @@ define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: lui a4, 16 ; RV64-NEXT: li a5, 40 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vand.vx v9, v8, a1, v0.t -; RV64-NEXT: slli a2, a2, 24 +; RV64-NEXT: vsrl.vi v9, v8, 24, v0.t +; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t ; RV64-NEXT: addiw a0, a4, -256 -; RV64-NEXT: vsll.vi v9, v9, 24, v0.t +; RV64-NEXT: vsrl.vx v11, v8, a3, v0.t +; RV64-NEXT: vsrl.vx v12, v8, a5, v0.t +; RV64-NEXT: vand.vx v12, v12, a0, v0.t +; RV64-NEXT: vor.vv v11, v12, v11, v0.t +; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: slli a2, a2, 24 +; RV64-NEXT: vand.vx v9, v9, a1, v0.t +; RV64-NEXT: vsll.vi v12, v12, 24, v0.t +; RV64-NEXT: vand.vx v10, v10, a2, v0.t +; RV64-NEXT: vor.vv v9, v10, v9, v0.t ; RV64-NEXT: vand.vx v10, v8, a2, v0.t ; RV64-NEXT: vsll.vi v10, v10, 8, v0.t -; RV64-NEXT: vor.vv v9, v9, v10, v0.t -; RV64-NEXT: vsll.vx v10, v8, a3, v0.t -; RV64-NEXT: vand.vx v11, v8, a0, v0.t -; RV64-NEXT: vsll.vx v11, v11, a5, v0.t -; RV64-NEXT: vor.vv v10, v10, v11, v0.t -; RV64-NEXT: vor.vv v9, v10, v9, v0.t -; RV64-NEXT: vsrl.vx v10, v8, a3, v0.t -; RV64-NEXT: vsrl.vx v11, v8, a5, v0.t -; RV64-NEXT: vand.vx v11, v11, a0, v0.t -; RV64-NEXT: vor.vv v10, v11, v10, v0.t -; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t -; RV64-NEXT: vand.vx v11, v11, a1, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v11, v0.t +; RV64-NEXT: vor.vv v10, v12, v10, v0.t +; RV64-NEXT: vsll.vx v12, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vx v8, v8, a5, v0.t +; RV64-NEXT: vor.vv v8, v12, v8, v0.t ; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v9, v8, v0.t +; RV64-NEXT: vor.vv v9, v9, v11, v0.t +; RV64-NEXT: vor.vv v8, v8, v9, v0.t ; RV64-NEXT: ret %v = call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl) ret <2 x i64> %v @@ -449,34 +449,34 @@ define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: li a4, 40 ; RV32-NEXT: lui a5, 4080 ; RV32-NEXT: addi a6, sp, 8 +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v10, v8, a2, v0.t +; RV32-NEXT: vsll.vx v12, v8, a2, v0.t ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v12, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v14, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV32-NEXT: vand.vx v18, v8, a1, v0.t +; RV32-NEXT: vand.vx v16, v16, a1, v0.t +; RV32-NEXT: vor.vv v14, v16, v14, v0.t ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a6), zero +; RV32-NEXT: vlse64.v v16, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v12, v12, a4, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vand.vx v12, v8, a5, v0.t -; RV32-NEXT: vsll.vi v12, v12, 24, v0.t -; RV32-NEXT: vand.vv v16, v8, v14, v0.t +; RV32-NEXT: vsll.vx v18, v18, a4, v0.t +; RV32-NEXT: vor.vv v12, v12, v18, v0.t +; RV32-NEXT: vsrl.vi v18, v8, 8, v0.t +; RV32-NEXT: vand.vx v10, v10, a5, v0.t +; RV32-NEXT: vand.vv v18, v18, v16, v0.t +; RV32-NEXT: vor.vv v10, v18, v10, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vand.vx v8, v8, a5, v0.t +; RV32-NEXT: vsll.vi v8, v8, 24, v0.t ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a4, v0.t -; RV32-NEXT: vand.vx v16, v16, a1, v0.t -; RV32-NEXT: vor.vv v12, v16, v12, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV32-NEXT: vand.vx v16, v16, a5, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v14, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t +; RV32-NEXT: vor.vv v8, v12, v8, v0.t +; RV32-NEXT: vor.vv v10, v10, v14, v0.t +; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret @@ -489,29 +489,29 @@ define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: lui a4, 16 ; RV64-NEXT: li a5, 40 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vand.vx v10, v8, a1, v0.t -; RV64-NEXT: slli a2, a2, 24 +; RV64-NEXT: vsrl.vi v10, v8, 24, v0.t +; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t ; RV64-NEXT: addiw a0, a4, -256 -; RV64-NEXT: vsll.vi v10, v10, 24, v0.t +; RV64-NEXT: vsrl.vx v14, v8, a3, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t +; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vor.vv v14, v16, v14, v0.t +; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: slli a2, a2, 24 +; RV64-NEXT: vand.vx v10, v10, a1, v0.t +; RV64-NEXT: vsll.vi v16, v16, 24, v0.t +; RV64-NEXT: vand.vx v12, v12, a2, v0.t +; RV64-NEXT: vor.vv v10, v12, v10, v0.t ; RV64-NEXT: vand.vx v12, v8, a2, v0.t ; RV64-NEXT: vsll.vi v12, v12, 8, v0.t -; RV64-NEXT: vor.vv v10, v10, v12, v0.t -; RV64-NEXT: vsll.vx v12, v8, a3, v0.t -; RV64-NEXT: vand.vx v14, v8, a0, v0.t -; RV64-NEXT: vsll.vx v14, v14, a5, v0.t -; RV64-NEXT: vor.vv v12, v12, v14, v0.t -; RV64-NEXT: vor.vv v10, v12, v10, v0.t -; RV64-NEXT: vsrl.vx v12, v8, a3, v0.t -; RV64-NEXT: vsrl.vx v14, v8, a5, v0.t -; RV64-NEXT: vand.vx v14, v14, a0, v0.t -; RV64-NEXT: vor.vv v12, v14, v12, v0.t -; RV64-NEXT: vsrl.vi v14, v8, 24, v0.t -; RV64-NEXT: vand.vx v14, v14, a1, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v14, v0.t +; RV64-NEXT: vor.vv v12, v16, v12, v0.t +; RV64-NEXT: vsll.vx v16, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vx v8, v8, a5, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: vor.vv v8, v10, v8, v0.t +; RV64-NEXT: vor.vv v10, v10, v14, v0.t +; RV64-NEXT: vor.vv v8, v8, v10, v0.t ; RV64-NEXT: ret %v = call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl) ret <4 x i64> %v @@ -609,34 +609,34 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: li a4, 40 ; RV32-NEXT: lui a5, 4080 ; RV32-NEXT: addi a6, sp, 8 +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vx v20, v8, a2, v0.t ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v20, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t +; RV32-NEXT: vand.vx v28, v8, a1, v0.t +; RV32-NEXT: vand.vx v24, v24, a1, v0.t +; RV32-NEXT: vor.vv v12, v24, v12, v0.t ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a6), zero +; RV32-NEXT: vlse64.v v24, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v20, v20, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v20, v0.t -; RV32-NEXT: vand.vx v20, v8, a5, v0.t -; RV32-NEXT: vsll.vi v20, v20, 24, v0.t -; RV32-NEXT: vand.vv v24, v8, v12, v0.t +; RV32-NEXT: vsll.vx v28, v28, a4, v0.t +; RV32-NEXT: vor.vv v20, v20, v28, v0.t +; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t +; RV32-NEXT: vand.vx v16, v16, a5, v0.t +; RV32-NEXT: vand.vv v28, v28, v24, v0.t +; RV32-NEXT: vor.vv v16, v28, v16, v0.t +; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vand.vx v8, v8, a5, v0.t +; RV32-NEXT: vsll.vi v8, v8, 24, v0.t ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t -; RV32-NEXT: vor.vv v20, v20, v24, v0.t -; RV32-NEXT: vor.vv v16, v16, v20, v0.t -; RV32-NEXT: vsrl.vx v20, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV32-NEXT: vand.vx v24, v24, a1, v0.t -; RV32-NEXT: vor.vv v20, v24, v20, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a5, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vor.vv v8, v8, v20, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v20, v8, v0.t +; RV32-NEXT: vor.vv v12, v16, v12, v0.t +; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret @@ -649,29 +649,29 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: lui a4, 16 ; RV64-NEXT: li a5, 40 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vand.vx v12, v8, a1, v0.t -; RV64-NEXT: slli a2, a2, 24 -; RV64-NEXT: addiw a0, a4, -256 -; RV64-NEXT: vsll.vi v12, v12, 24, v0.t -; RV64-NEXT: vand.vx v16, v8, a2, v0.t -; RV64-NEXT: vsll.vi v16, v16, 8, v0.t -; RV64-NEXT: vor.vv v12, v12, v16, v0.t -; RV64-NEXT: vsll.vx v16, v8, a3, v0.t -; RV64-NEXT: vand.vx v20, v8, a0, v0.t -; RV64-NEXT: vsll.vx v20, v20, a5, v0.t -; RV64-NEXT: vor.vv v16, v16, v20, v0.t -; RV64-NEXT: vor.vv v12, v16, v12, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vx v20, v8, a5, v0.t -; RV64-NEXT: vand.vx v20, v20, a0, v0.t -; RV64-NEXT: vor.vv v16, v20, v16, v0.t ; RV64-NEXT: vsrl.vi v20, v8, 24, v0.t +; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV64-NEXT: addiw a0, a4, -256 +; RV64-NEXT: vsrl.vx v12, v8, a3, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV64-NEXT: vand.vx v24, v24, a0, v0.t +; RV64-NEXT: vor.vv v12, v24, v12, v0.t +; RV64-NEXT: vand.vx v24, v8, a1, v0.t +; RV64-NEXT: slli a2, a2, 24 ; RV64-NEXT: vand.vx v20, v20, a1, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vsll.vi v24, v24, 24, v0.t +; RV64-NEXT: vand.vx v16, v16, a2, v0.t +; RV64-NEXT: vor.vv v16, v16, v20, v0.t +; RV64-NEXT: vand.vx v20, v8, a2, v0.t +; RV64-NEXT: vsll.vi v20, v20, 8, v0.t +; RV64-NEXT: vor.vv v20, v24, v20, v0.t +; RV64-NEXT: vsll.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vx v8, v8, a5, v0.t +; RV64-NEXT: vor.vv v8, v24, v8, v0.t ; RV64-NEXT: vor.vv v8, v8, v20, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v12, v8, v0.t +; RV64-NEXT: vor.vv v12, v16, v12, v0.t +; RV64-NEXT: vor.vv v8, v8, v12, v0.t ; RV64-NEXT: ret %v = call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl) ret <8 x i64> %v @@ -764,86 +764,90 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vx v16, v16, a3, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: li a2, 56 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: li a4, 40 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: lui a2, 4080 +; RV32-NEXT: addi a3, sp, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t -; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v24, v8, a1, v0.t -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v16, v24, a2, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV32-NEXT: vand.vx v24, v24, a1, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a3, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -855,46 +859,54 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: li a2, 255 ; RV64-NEXT: li a3, 56 ; RV64-NEXT: lui a4, 16 ; RV64-NEXT: li a5, 40 +; RV64-NEXT: addiw a4, a4, -256 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vand.vx v16, v8, a1, v0.t -; RV64-NEXT: slli a2, a2, 24 -; RV64-NEXT: addiw a0, a4, -256 -; RV64-NEXT: vsll.vi v16, v16, 24, v0.t -; RV64-NEXT: vand.vx v24, v8, a2, v0.t -; RV64-NEXT: vsll.vi v24, v24, 8, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV64-NEXT: vsll.vx v24, v8, a3, v0.t -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsll.vx v16, v16, a5, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV64-NEXT: vand.vx v24, v24, a4, v0.t ; RV64-NEXT: vor.vv v16, v24, v16, v0.t -; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vor.vv v24, v16, v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t +; RV64-NEXT: slli a2, a2, 24 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV64-NEXT: vand.vx v24, v24, a2, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vsll.vi v24, v16, 24, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t +; RV64-NEXT: vsll.vi v16, v16, 8, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vand.vx v24, v8, a4, v0.t +; RV64-NEXT: vsll.vx v24, v24, a5, v0.t +; RV64-NEXT: vsll.vx v8, v8, a3, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 ; RV64-NEXT: addi sp, sp, 16 @@ -1029,86 +1041,90 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vx v16, v16, a3, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: li a2, 56 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: li a4, 40 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: lui a2, 4080 +; RV32-NEXT: addi a3, sp, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t -; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v24, v8, a1, v0.t -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v16, v24, a2, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV32-NEXT: vand.vx v24, v24, a1, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a3, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -1120,46 +1136,54 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: li a2, 255 ; RV64-NEXT: li a3, 56 ; RV64-NEXT: lui a4, 16 ; RV64-NEXT: li a5, 40 +; RV64-NEXT: addiw a4, a4, -256 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vand.vx v16, v8, a1, v0.t -; RV64-NEXT: slli a2, a2, 24 -; RV64-NEXT: addiw a0, a4, -256 -; RV64-NEXT: vsll.vi v16, v16, 24, v0.t -; RV64-NEXT: vand.vx v24, v8, a2, v0.t -; RV64-NEXT: vsll.vi v24, v24, 8, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV64-NEXT: vsll.vx v24, v8, a3, v0.t -; RV64-NEXT: vand.vx v16, v8, a0, v0.t -; RV64-NEXT: vsll.vx v16, v16, a5, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a3, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV64-NEXT: vand.vx v24, v24, a4, v0.t ; RV64-NEXT: vor.vv v16, v24, v16, v0.t -; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: vor.vv v24, v16, v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t +; RV64-NEXT: slli a2, a2, 24 ; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV64-NEXT: vand.vx v24, v24, a2, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vsll.vi v24, v16, 24, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t +; RV64-NEXT: vsll.vi v16, v16, 8, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vand.vx v24, v8, a4, v0.t +; RV64-NEXT: vsll.vx v24, v24, a5, v0.t +; RV64-NEXT: vsll.vx v8, v8, a3, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 ; RV64-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll index dbbb8362144ca..0b17e2d8a754a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll @@ -132,8 +132,8 @@ define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: vsub.vv v12, v8, v10 ; CHECK-NEXT: vmulhu.vv v9, v12, v9 ; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsrl.vv v9, v9, v11 +; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %e0 = udiv i32 %a, 23 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index c6ff39ad10d6b..0b0c4b3f63f10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -17,9 +17,9 @@ define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -35,12 +35,12 @@ define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -63,8 +63,8 @@ define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -77,11 +77,11 @@ define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -103,9 +103,9 @@ define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -121,12 +121,12 @@ define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -149,8 +149,8 @@ define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,11 +163,11 @@ define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -189,9 +189,9 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -207,12 +207,12 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -235,8 +235,8 @@ define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,11 +249,11 @@ define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -271,14 +271,14 @@ declare <16 x half> @llvm.vp.ceil.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -295,12 +295,12 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -323,8 +323,8 @@ define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -337,11 +337,11 @@ define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,9 +363,9 @@ define <2 x float> @vp_ceil_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -384,8 +384,8 @@ define <2 x float> @vp_ceil_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -405,9 +405,9 @@ define <4 x float> @vp_ceil_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -426,8 +426,8 @@ define <4 x float> @vp_ceil_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -444,13 +444,13 @@ define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-LABEL: vp_ceil_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -470,8 +470,8 @@ define <8 x float> @vp_ceil_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -488,13 +488,13 @@ define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-LABEL: vp_ceil_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,8 +514,8 @@ define <16 x float> @vp_ceil_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -535,9 +535,9 @@ define <2 x double> @vp_ceil_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %e ; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -556,8 +556,8 @@ define <2 x double> @vp_ceil_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -573,14 +573,14 @@ declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -600,8 +600,8 @@ define <4 x double> @vp_ceil_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -617,14 +617,14 @@ declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -644,8 +644,8 @@ define <8 x double> @vp_ceil_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -661,14 +661,14 @@ declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI22_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -688,8 +688,8 @@ define <15 x double> @vp_ceil_v15f64_unmasked(<15 x double> %va, i32 zeroext %ev ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -705,14 +705,14 @@ declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI24_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -732,8 +732,8 @@ define <16 x double> @vp_ceil_v16f64_unmasked(<16 x double> %va, i32 zeroext %ev ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -749,44 +749,44 @@ declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: lui a2, %hi(.LCPI26_0) +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: fsrmi a2, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vmv1r.v v5, v7 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -806,28 +806,25 @@ define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %ev ; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: lui a1, %hi(.LCPI27_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: fsrmi a2, 3 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index 133c7d8653ef9..9e7cd3cb06ac2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -916,11 +916,11 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v2i64: @@ -966,8 +966,8 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) @@ -1126,11 +1126,11 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v4i64: @@ -1176,8 +1176,8 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) @@ -1314,33 +1314,33 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v16, v8, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: vand.vv v12, v8, v12, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vand.vv v12, v16, v12, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vv v12, v16, v12, v0.t -; RV32-NEXT: vand.vv v16, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v12, 2, v0.t -; RV32-NEXT: vand.vv v8, v12, v8, v0.t +; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v12, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v12, v8, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v12, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 -; RV32-NEXT: li a1, 56 +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v12, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v8i64: @@ -1386,8 +1386,8 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) @@ -1546,11 +1546,11 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v15i64: @@ -1596,8 +1596,8 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) @@ -1756,11 +1756,11 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v16i64: @@ -1806,8 +1806,8 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) @@ -1923,30 +1923,29 @@ declare <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32) define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v6, v0 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB34_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB34_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB34_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: lui a3, 349525 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a3, a3, 1365 @@ -1954,26 +1953,41 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v16, 2, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v24, v0.t -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v16, 8, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 16, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a3 ; RV32-NEXT: csrr a3, vlenb @@ -1981,103 +1995,158 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 4112 -; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v24, v16, a2, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: lui a3, 4112 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: addi a3, a3, 257 +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v24, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v8, v16, v0.t +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v24, a2, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v16, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -2086,84 +2155,130 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; ; RV64-LABEL: vp_ctlz_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v6, v0 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: vslidedown.vi v7, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB34_2: +; RV64-NEXT: vmv1r.v v0, v6 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV64-NEXT: li a1, 32 +; RV64-NEXT: li a6, 32 ; RV64-NEXT: lui a2, 349525 ; RV64-NEXT: lui a3, 209715 ; RV64-NEXT: lui a4, 61681 ; RV64-NEXT: lui a5, 4112 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: addiw a3, a3, 819 -; RV64-NEXT: addiw a6, a4, -241 -; RV64-NEXT: addiw a7, a5, 257 -; RV64-NEXT: slli a5, a2, 32 -; RV64-NEXT: add a5, a2, a5 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a4, a3, a4 -; RV64-NEXT: slli a2, a6, 32 -; RV64-NEXT: add a2, a6, a2 -; RV64-NEXT: slli a3, a7, 32 -; RV64-NEXT: add a3, a7, a3 -; RV64-NEXT: addi a6, a0, -16 -; RV64-NEXT: sltu a0, a0, a6 +; RV64-NEXT: addiw a7, a2, 1365 +; RV64-NEXT: addiw a2, a3, 819 +; RV64-NEXT: addiw a3, a4, -241 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a4, a7, 32 +; RV64-NEXT: add a7, a7, a4 +; RV64-NEXT: slli a4, a2, 32 +; RV64-NEXT: add a4, a2, a4 +; RV64-NEXT: slli a2, a3, 32 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: slli a3, a5, 32 +; RV64-NEXT: add a3, a5, a3 +; RV64-NEXT: addi a5, a0, -16 +; RV64-NEXT: sltu a0, a0, a5 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a6, a0, a6 +; RV64-NEXT: and a5, a0, a5 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vor.vv v8, v8, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v8, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV64-NEXT: vand.vx v24, v24, a5, v0.t -; RV64-NEXT: vsub.vv v8, v8, v24, v0.t -; RV64-NEXT: vand.vx v24, v8, a4, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vadd.vv v8, v24, v8, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v24, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 2, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 16, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vx v24, v8, a6, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 8, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vnot.v v8, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 16, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: vsrl.vx v24, v16, a1, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV64-NEXT: vand.vx v24, v24, a7, v0.t +; RV64-NEXT: vsub.vv v8, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; RV64-NEXT: vsrl.vx v24, v16, a6, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v24, v8, a4, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vnot.v v16, v16, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v8, a4, v0.t +; RV64-NEXT: vadd.vv v8, v24, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV64-NEXT: vand.vx v24, v24, a5, v0.t +; RV64-NEXT: vand.vx v24, v24, a7, v0.t ; RV64-NEXT: vsub.vv v16, v16, v24, v0.t -; RV64-NEXT: vand.vx v24, v16, a4, v0.t -; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV64-NEXT: vadd.vv v24, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v16, a4, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v24, v24, a2, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v16, a4, v0.t -; RV64-NEXT: vadd.vv v16, v24, v16, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmul.vx v24, v24, a3, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; RV64-NEXT: vadd.vv v16, v8, v16, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vx v8, v24, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV64-NEXT: vadd.vv v16, v16, v24, v0.t ; RV64-NEXT: vand.vx v16, v16, a2, v0.t @@ -3301,11 +3416,11 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v2i64: @@ -3351,8 +3466,8 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 true, <2 x i1> %m, i32 %evl) @@ -3509,11 +3624,11 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v4i64: @@ -3559,8 +3674,8 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 true, <4 x i1> %m, i32 %evl) @@ -3695,33 +3810,33 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v16, v8, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: vand.vv v12, v8, v12, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vand.vv v12, v16, v12, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vv v12, v16, v12, v0.t -; RV32-NEXT: vand.vv v16, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v12, 2, v0.t -; RV32-NEXT: vand.vv v8, v12, v8, v0.t +; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v12, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v12, v8, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v12, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 -; RV32-NEXT: li a1, 56 +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v12, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v8i64: @@ -3767,8 +3882,8 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 true, <8 x i1> %m, i32 %evl) @@ -3925,11 +4040,11 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v15i64: @@ -3975,8 +4090,8 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl) @@ -4133,11 +4248,11 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v16i64: @@ -4183,8 +4298,8 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl) @@ -4298,30 +4413,29 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v6, v0 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB70_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB70_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB70_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: lui a3, 349525 ; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a3, a3, 1365 @@ -4329,26 +4443,41 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v16, 2, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v24, v0.t -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v16, 8, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 16, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a3 ; RV32-NEXT: csrr a3, vlenb @@ -4356,103 +4485,158 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 4112 -; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vor.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v24, v16, a2, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: lui a3, 4112 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: addi a3, a3, 257 +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v24, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v8, v16, v0.t +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v24, a2, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v16, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -4461,84 +4645,130 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; ; RV64-LABEL: vp_ctlz_zero_undef_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v6, v0 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: vslidedown.vi v7, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB70_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB70_2: +; RV64-NEXT: vmv1r.v v0, v6 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV64-NEXT: li a1, 32 +; RV64-NEXT: li a6, 32 ; RV64-NEXT: lui a2, 349525 ; RV64-NEXT: lui a3, 209715 ; RV64-NEXT: lui a4, 61681 ; RV64-NEXT: lui a5, 4112 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: addiw a3, a3, 819 -; RV64-NEXT: addiw a6, a4, -241 -; RV64-NEXT: addiw a7, a5, 257 -; RV64-NEXT: slli a5, a2, 32 -; RV64-NEXT: add a5, a2, a5 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a4, a3, a4 -; RV64-NEXT: slli a2, a6, 32 -; RV64-NEXT: add a2, a6, a2 -; RV64-NEXT: slli a3, a7, 32 -; RV64-NEXT: add a3, a7, a3 -; RV64-NEXT: addi a6, a0, -16 -; RV64-NEXT: sltu a0, a0, a6 +; RV64-NEXT: addiw a7, a2, 1365 +; RV64-NEXT: addiw a2, a3, 819 +; RV64-NEXT: addiw a3, a4, -241 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a4, a7, 32 +; RV64-NEXT: add a7, a7, a4 +; RV64-NEXT: slli a4, a2, 32 +; RV64-NEXT: add a4, a2, a4 +; RV64-NEXT: slli a2, a3, 32 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: slli a3, a5, 32 +; RV64-NEXT: add a3, a5, a3 +; RV64-NEXT: addi a5, a0, -16 +; RV64-NEXT: sltu a0, a0, a5 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a6, a0, a6 +; RV64-NEXT: and a5, a0, a5 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vor.vv v8, v8, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 2, v0.t ; RV64-NEXT: vor.vv v8, v8, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t ; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV64-NEXT: vor.vv v8, v8, v24, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV64-NEXT: vand.vx v24, v24, a5, v0.t -; RV64-NEXT: vsub.vv v8, v8, v24, v0.t -; RV64-NEXT: vand.vx v24, v8, a4, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vadd.vv v8, v24, v8, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v24, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 2, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 16, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vx v24, v8, a6, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 8, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vnot.v v8, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 16, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t -; RV64-NEXT: vsrl.vx v24, v16, a1, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV64-NEXT: vand.vx v24, v24, a7, v0.t +; RV64-NEXT: vsub.vv v8, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; RV64-NEXT: vsrl.vx v24, v16, a6, v0.t ; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v24, v8, a4, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vnot.v v16, v16, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v8, a4, v0.t +; RV64-NEXT: vadd.vv v8, v24, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV64-NEXT: vand.vx v24, v24, a5, v0.t +; RV64-NEXT: vand.vx v24, v24, a7, v0.t ; RV64-NEXT: vsub.vv v16, v16, v24, v0.t -; RV64-NEXT: vand.vx v24, v16, a4, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV64-NEXT: vadd.vv v24, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v16, a4, v0.t ; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v24, v24, a2, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v16, a4, v0.t -; RV64-NEXT: vadd.vv v16, v24, v16, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmul.vx v24, v24, a3, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; RV64-NEXT: vadd.vv v16, v8, v16, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vx v8, v24, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV64-NEXT: vadd.vv v16, v16, v24, v0.t ; RV64-NEXT: vand.vx v16, v16, a2, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index baf36425298d1..aaeb91570993a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -686,11 +686,11 @@ define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v2i64: @@ -722,8 +722,8 @@ define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl) @@ -840,11 +840,11 @@ define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v4i64: @@ -876,8 +876,8 @@ define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl) @@ -974,31 +974,31 @@ define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v12, v16, v0.t +; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: vand.vv v16, v8, v12, v0.t +; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v12, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v12, v8, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v12, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 -; RV32-NEXT: li a1, 56 +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v12, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v8i64: @@ -1030,8 +1030,8 @@ define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl) @@ -1148,11 +1148,11 @@ define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v15i64: @@ -1184,8 +1184,8 @@ define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl) @@ -1302,11 +1302,11 @@ define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v16i64: @@ -1338,8 +1338,8 @@ define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl) @@ -1430,130 +1430,182 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 25 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x19, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 25 * vlenb +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 +; RV32-NEXT: vslidedown.vi v16, v0, 2 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB34_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB34_2: +; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 ; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v6, v24 +; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl1r.v v7, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsub.vv v24, v8, v16, v0.t ; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, a0, -16 -; RV32-NEXT: sltu a0, a0, a2 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v24, v0.t -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: vand.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v24, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v8, v16, v0.t +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v24, a2, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v16, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 25 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 @@ -1563,62 +1615,84 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; ; RV64-LABEL: vp_ctpop_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v6, v0 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: vslidedown.vi v7, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB34_2: +; RV64-NEXT: vmv1r.v v0, v6 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV64-NEXT: lui a1, 349525 -; RV64-NEXT: lui a2, 209715 -; RV64-NEXT: lui a3, 61681 -; RV64-NEXT: lui a4, 4112 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: addiw a2, a2, 819 -; RV64-NEXT: addiw a3, a3, -241 -; RV64-NEXT: addiw a4, a4, 257 -; RV64-NEXT: slli a5, a1, 32 -; RV64-NEXT: add a1, a1, a5 -; RV64-NEXT: slli a5, a2, 32 -; RV64-NEXT: add a2, a2, a5 -; RV64-NEXT: slli a5, a3, 32 -; RV64-NEXT: add a3, a3, a5 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a4, a4, a5 -; RV64-NEXT: addi a5, a0, -16 -; RV64-NEXT: sltu a0, a0, a5 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a2, 32 +; RV64-NEXT: add a6, a2, a6 +; RV64-NEXT: slli a2, a3, 32 +; RV64-NEXT: add a7, a3, a2 +; RV64-NEXT: slli a3, a4, 32 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: slli a2, a5, 32 +; RV64-NEXT: add a2, a5, a2 +; RV64-NEXT: addi a4, a0, -16 +; RV64-NEXT: sltu a0, a0, a4 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a5 -; RV64-NEXT: li a5, 56 -; RV64-NEXT: vand.vx v24, v24, a1, v0.t +; RV64-NEXT: and a4, a0, a4 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vand.vx v24, v24, a6, v0.t ; RV64-NEXT: vsub.vv v8, v8, v24, v0.t -; RV64-NEXT: vand.vx v24, v8, a2, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vadd.vv v8, v24, v8, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v24, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vmul.vx v8, v8, a4, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a5, v0.t ; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV64-NEXT: vand.vx v24, v24, a1, v0.t +; RV64-NEXT: vand.vx v24, v24, a6, v0.t ; RV64-NEXT: vsub.vv v16, v16, v24, v0.t -; RV64-NEXT: vand.vx v24, v16, a2, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v24, v8, a7, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV64-NEXT: vand.vx v8, v8, a7, v0.t +; RV64-NEXT: vadd.vv v8, v24, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vand.vx v24, v16, a7, v0.t ; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t +; RV64-NEXT: vand.vx v16, v16, a7, v0.t ; RV64-NEXT: vadd.vv v16, v24, v16, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v8, a3, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmul.vx v8, v8, a2, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vmul.vx v16, v16, a4, v0.t -; RV64-NEXT: vsrl.vx v16, v16, a5, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vmul.vx v16, v16, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index 666673a60da5d..2f6a87c40444a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -11,8 +11,8 @@ define <2 x i8> @vp_cttz_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t @@ -61,8 +61,8 @@ define <4 x i8> @vp_cttz_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t @@ -111,8 +111,8 @@ define <8 x i8> @vp_cttz_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t @@ -161,8 +161,8 @@ define <16 x i8> @vp_cttz_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t @@ -211,10 +211,10 @@ define <2 x i16> @vp_cttz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 3 @@ -275,10 +275,10 @@ define <4 x i16> @vp_cttz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 3 @@ -339,10 +339,10 @@ define <8 x i16> @vp_cttz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 3 @@ -403,10 +403,10 @@ define <16 x i16> @vp_cttz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t -; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: lui a0, 3 @@ -467,10 +467,10 @@ define <2 x i32> @vp_cttz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 209715 @@ -533,10 +533,10 @@ define <4 x i32> @vp_cttz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 209715 @@ -599,10 +599,10 @@ define <8 x i32> @vp_cttz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t -; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: lui a0, 209715 @@ -665,10 +665,10 @@ define <16 x i32> @vp_cttz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t -; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t ; CHECK-NEXT: lui a0, 209715 @@ -731,15 +731,15 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vadd.vi v9, v8, -1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -761,11 +761,11 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v2i64: @@ -800,8 +800,8 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) @@ -897,15 +897,15 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vadd.vi v10, v8, -1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -927,11 +927,11 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v4i64: @@ -966,8 +966,8 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) @@ -1063,41 +1063,41 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vadd.vi v12, v8, -1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vand.vv v12, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v8, v12, 1, v0.t -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vv v12, v12, v16, v0.t -; RV32-NEXT: vand.vv v16, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v12, 2, v0.t -; RV32-NEXT: vand.vv v8, v12, v8, v0.t +; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v12, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v12, v8, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v12, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 -; RV32-NEXT: li a1, 56 +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v12, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v8i64: @@ -1132,8 +1132,8 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) @@ -1229,41 +1229,41 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vi v16, v8, -1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vand.vv v24, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v24, v16, v24, v0.t -; RV32-NEXT: vand.vv v16, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t -; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v15i64: @@ -1298,8 +1298,8 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) @@ -1395,41 +1395,41 @@ define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vi v16, v8, -1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vand.vv v24, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v24, v16, v24, v0.t -; RV32-NEXT: vand.vv v16, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t -; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v16i64: @@ -1464,8 +1464,8 @@ define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) @@ -1562,136 +1562,174 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 25 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x19, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 25 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a1, a2, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a2, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 +; RV32-NEXT: vslidedown.vi v16, v0, 2 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB34_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB34_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vadd.vi v16, v8, -1, v0.t -; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: vadd.vi v24, v8, -1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v24, v0 +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v6, v0 +; RV32-NEXT: vsub.vv v24, v8, v24, v0.t +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl1r.v v7, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v8, v16, -1, v0.t +; RV32-NEXT: vnot.v v16, v16, v0.t +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a2, a2, 819 ; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v6 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v6 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 4, v0.t +; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, a0, -16 -; RV32-NEXT: sltu a0, a0, a2 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vi v24, v8, -1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v24, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 4 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v24, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v8, v16, v0.t +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v24, a2, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v16, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 25 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 @@ -1701,73 +1739,95 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; ; RV64-LABEL: vp_cttz_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v6, v0 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: vslidedown.vi v7, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB34_2: +; RV64-NEXT: vmv1r.v v0, v6 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v24, v8, -1, v0.t -; RV64-NEXT: lui a1, 349525 -; RV64-NEXT: lui a2, 209715 -; RV64-NEXT: lui a3, 61681 -; RV64-NEXT: lui a4, 4112 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: addiw a2, a2, 819 -; RV64-NEXT: addiw a5, a3, -241 -; RV64-NEXT: addiw a4, a4, 257 -; RV64-NEXT: slli a3, a1, 32 -; RV64-NEXT: add a6, a1, a3 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: add a3, a2, a3 -; RV64-NEXT: slli a1, a5, 32 -; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: vnot.v v8, v8, v0.t +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a2, 32 +; RV64-NEXT: add a6, a2, a6 +; RV64-NEXT: slli a2, a3, 32 +; RV64-NEXT: add a7, a3, a2 ; RV64-NEXT: slli a2, a4, 32 ; RV64-NEXT: add a2, a4, a2 +; RV64-NEXT: slli a3, a5, 32 +; RV64-NEXT: add a3, a5, a3 ; RV64-NEXT: addi a4, a0, -16 ; RV64-NEXT: sltu a0, a0, a4 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a4, a0, a4 ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV64-NEXT: vand.vx v24, v24, a6, v0.t -; RV64-NEXT: vsub.vv v8, v8, v24, v0.t -; RV64-NEXT: vand.vx v24, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v24, v8, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v24, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t -; RV64-NEXT: vmul.vx v8, v8, a2, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV64-NEXT: vsub.vv v24, v8, v24, v0.t ; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v24, v16, -1, v0.t +; RV64-NEXT: vadd.vi v8, v16, -1, v0.t ; RV64-NEXT: vnot.v v16, v16, v0.t -; RV64-NEXT: vand.vv v16, v16, v24, v0.t -; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV64-NEXT: vand.vx v24, v24, a6, v0.t -; RV64-NEXT: vsub.vv v16, v16, v24, v0.t -; RV64-NEXT: vand.vx v24, v16, a3, v0.t -; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vadd.vv v16, v24, v16, v0.t -; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vand.vv v8, v16, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v16, v24, a7, v0.t +; RV64-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV64-NEXT: vand.vx v24, v24, a7, v0.t ; RV64-NEXT: vadd.vv v16, v16, v24, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vmul.vx v16, v16, a2, v0.t -; RV64-NEXT: vsrl.vx v16, v16, a0, v0.t -; RV64-NEXT: ret - %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) - ret <32 x i64> %v -} - +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV64-NEXT: vand.vx v24, v24, a6, v0.t +; RV64-NEXT: vsub.vv v24, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t +; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vand.vx v16, v24, a7, v0.t +; RV64-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV64-NEXT: vand.vx v24, v24, a7, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vadd.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vadd.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v16, v16, a2, v0.t +; RV64-NEXT: vmul.vx v16, v16, a3, v0.t +; RV64-NEXT: vsrl.vx v16, v16, a0, v0.t +; RV64-NEXT: ret + %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v32i64_unmasked: ; RV32: # %bb.0: @@ -1950,8 +2010,8 @@ define <2 x i8> @vp_cttz_zero_undef_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t @@ -1998,8 +2058,8 @@ define <4 x i8> @vp_cttz_zero_undef_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t @@ -2046,8 +2106,8 @@ define <8 x i8> @vp_cttz_zero_undef_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t @@ -2094,8 +2154,8 @@ define <16 x i8> @vp_cttz_zero_undef_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zero ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t +; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t @@ -2142,10 +2202,10 @@ define <2 x i16> @vp_cttz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 3 @@ -2204,10 +2264,10 @@ define <4 x i16> @vp_cttz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 3 @@ -2266,10 +2326,10 @@ define <8 x i16> @vp_cttz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 3 @@ -2328,10 +2388,10 @@ define <16 x i16> @vp_cttz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 z ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t -; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: lui a0, 3 @@ -2390,10 +2450,10 @@ define <2 x i32> @vp_cttz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 209715 @@ -2454,10 +2514,10 @@ define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t -; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: lui a0, 209715 @@ -2518,10 +2578,10 @@ define <8 x i32> @vp_cttz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t -; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vand.vv v8, v8, v10, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t ; CHECK-NEXT: vand.vx v10, v10, a0, v0.t ; CHECK-NEXT: lui a0, 209715 @@ -2582,10 +2642,10 @@ define <16 x i32> @vp_cttz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 z ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t -; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vand.vv v8, v8, v12, v0.t +; CHECK-NEXT: addi a0, a0, 1365 ; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t ; CHECK-NEXT: vand.vx v12, v12, a0, v0.t ; CHECK-NEXT: lui a0, 209715 @@ -2646,15 +2706,15 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vadd.vi v9, v8, -1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: vand.vv v9, v9, v10, v0.t ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -2676,11 +2736,11 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe ; RV32-NEXT: vadd.vv v8, v8, v9, v0.t ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vmul.vv v8, v8, v9, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v2i64: @@ -2715,8 +2775,8 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 true, <2 x i1> %m, i32 %evl) @@ -2810,15 +2870,15 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vadd.vi v10, v8, -1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vand.vv v8, v8, v10, v0.t +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: vand.vv v10, v10, v12, v0.t ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -2840,11 +2900,11 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe ; RV32-NEXT: vadd.vv v8, v8, v10, v0.t ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vmul.vv v8, v8, v10, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v4i64: @@ -2879,8 +2939,8 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 true, <4 x i1> %m, i32 %evl) @@ -2974,41 +3034,41 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vadd.vi v12, v8, -1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vand.vv v12, v8, v12, v0.t -; RV32-NEXT: vsrl.vi v8, v12, 1, v0.t -; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t +; RV32-NEXT: vand.vv v12, v12, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vv v12, v12, v16, v0.t -; RV32-NEXT: vand.vv v16, v12, v8, v0.t -; RV32-NEXT: vsrl.vi v12, v12, 2, v0.t -; RV32-NEXT: vand.vv v8, v12, v8, v0.t +; RV32-NEXT: vsub.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v12, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v12, v8, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v12, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 -; RV32-NEXT: li a1, 56 +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v12, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v8i64: @@ -3043,8 +3103,8 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 true, <8 x i1> %m, i32 %evl) @@ -3138,41 +3198,41 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vi v16, v8, -1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vand.vv v24, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v24, v16, v24, v0.t -; RV32-NEXT: vand.vv v16, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t -; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64: @@ -3207,8 +3267,8 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl) @@ -3302,41 +3362,41 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; RV32: # %bb.0: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vi v16, v8, -1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: vand.vv v24, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v24, v16, v24, v0.t -; RV32-NEXT: vand.vv v16, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t -; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a1 ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: vmul.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v16i64: @@ -3371,8 +3431,8 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: li a0, 56 ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl) @@ -3467,136 +3527,174 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 25 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x19, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 25 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a1, a2, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a2, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 +; RV32-NEXT: vslidedown.vi v16, v0, 2 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB70_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB70_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vadd.vi v16, v8, -1, v0.t -; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: vadd.vi v24, v8, -1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v24, v0 +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v6, v0 +; RV32-NEXT: vsub.vv v24, v8, v24, v0.t +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl1r.v v7, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v8, v16, -1, v0.t +; RV32-NEXT: vnot.v v16, v16, v0.t +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a2, a2, 819 ; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v6 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v6 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: vsrl.vi v16, v24, 4, v0.t +; RV32-NEXT: vadd.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, a0, -16 -; RV32-NEXT: sltu a0, a0, a2 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vi v24, v8, -1, v0.t -; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v24, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 4 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v24, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v16, v8, v16, v0.t +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v24, a2, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v16, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 25 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 @@ -3606,67 +3704,89 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; ; RV64-LABEL: vp_cttz_zero_undef_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v6, v0 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: vslidedown.vi v7, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB70_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB70_2: +; RV64-NEXT: vmv1r.v v0, v6 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v24, v8, -1, v0.t -; RV64-NEXT: lui a1, 349525 -; RV64-NEXT: lui a2, 209715 -; RV64-NEXT: lui a3, 61681 -; RV64-NEXT: lui a4, 4112 -; RV64-NEXT: addiw a1, a1, 1365 -; RV64-NEXT: addiw a2, a2, 819 -; RV64-NEXT: addiw a5, a3, -241 -; RV64-NEXT: addiw a4, a4, 257 -; RV64-NEXT: slli a3, a1, 32 -; RV64-NEXT: add a6, a1, a3 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: add a3, a2, a3 -; RV64-NEXT: slli a1, a5, 32 -; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: vnot.v v8, v8, v0.t +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a2, 32 +; RV64-NEXT: add a6, a2, a6 +; RV64-NEXT: slli a2, a3, 32 +; RV64-NEXT: add a7, a3, a2 ; RV64-NEXT: slli a2, a4, 32 ; RV64-NEXT: add a2, a4, a2 +; RV64-NEXT: slli a3, a5, 32 +; RV64-NEXT: add a3, a5, a3 ; RV64-NEXT: addi a4, a0, -16 ; RV64-NEXT: sltu a0, a0, a4 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a4, a0, a4 ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV64-NEXT: vand.vx v24, v24, a6, v0.t -; RV64-NEXT: vsub.vv v8, v8, v24, v0.t -; RV64-NEXT: vand.vx v24, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v24, v8, v0.t -; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v24, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t -; RV64-NEXT: vmul.vx v8, v8, a2, v0.t -; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV64-NEXT: vsub.vv v24, v8, v24, v0.t ; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v24, v16, -1, v0.t +; RV64-NEXT: vadd.vi v8, v16, -1, v0.t ; RV64-NEXT: vnot.v v16, v16, v0.t -; RV64-NEXT: vand.vv v16, v16, v24, v0.t -; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV64-NEXT: vand.vv v8, v16, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v16, v24, a7, v0.t +; RV64-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV64-NEXT: vand.vx v24, v24, a7, v0.t +; RV64-NEXT: vadd.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV64-NEXT: vand.vx v24, v24, a6, v0.t -; RV64-NEXT: vsub.vv v16, v16, v24, v0.t -; RV64-NEXT: vand.vx v24, v16, a3, v0.t -; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vadd.vv v16, v24, v16, v0.t +; RV64-NEXT: vsub.vv v24, v8, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v8, v16, 4, v0.t +; RV64-NEXT: vadd.vv v8, v16, v8, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vand.vx v16, v24, a7, v0.t +; RV64-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV64-NEXT: vand.vx v24, v24, a7, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; RV64-NEXT: vadd.vv v16, v16, v24, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v16, v16, v24, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vmul.vx v16, v16, a2, v0.t +; RV64-NEXT: vand.vx v16, v16, a2, v0.t +; RV64-NEXT: vmul.vx v16, v16, a3, v0.t ; RV64-NEXT: vsrl.vx v16, v16, a0, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 true, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index d884cece89507..33b0835cfb8e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -44,8 +44,8 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind { ; RVF-NEXT: vnsrl.wi v12, v8, 23 ; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RVF-NEXT: vnsrl.wi v8, v12, 0 -; RVF-NEXT: vmseq.vi v0, v14, 0 ; RVF-NEXT: vsub.vx v8, v8, a1 +; RVF-NEXT: vmseq.vi v0, v14, 0 ; RVF-NEXT: vmerge.vim v8, v8, 8, v0 ; RVF-NEXT: vse8.v v8, (a0) ; RVF-NEXT: ret @@ -63,8 +63,8 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind { ; RVD-NEXT: vnsrl.wi v12, v8, 23 ; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RVD-NEXT: vnsrl.wi v8, v12, 0 -; RVD-NEXT: vmseq.vi v0, v14, 0 ; RVD-NEXT: vsub.vx v8, v8, a1 +; RVD-NEXT: vmseq.vi v0, v14, 0 ; RVD-NEXT: vmerge.vim v8, v8, 8, v0 ; RVD-NEXT: vse8.v v8, (a0) ; RVD-NEXT: ret @@ -419,8 +419,8 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { ; RVF-NEXT: vnsrl.wi v16, v8, 23 ; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RVF-NEXT: vnsrl.wi v8, v16, 0 -; RVF-NEXT: vmseq.vi v0, v20, 0 ; RVF-NEXT: vsub.vx v8, v8, a1 +; RVF-NEXT: vmseq.vi v0, v20, 0 ; RVF-NEXT: vmerge.vim v8, v8, 8, v0 ; RVF-NEXT: vse8.v v8, (a0) ; RVF-NEXT: ret @@ -439,8 +439,8 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { ; RVD-NEXT: vnsrl.wi v16, v8, 23 ; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RVD-NEXT: vnsrl.wi v8, v16, 0 -; RVD-NEXT: vmseq.vi v0, v20, 0 ; RVD-NEXT: vsub.vx v8, v8, a1 +; RVD-NEXT: vmseq.vi v0, v20, 0 ; RVD-NEXT: vmerge.vim v8, v8, 8, v0 ; RVD-NEXT: vse8.v v8, (a0) ; RVD-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll index bb2b57fbcc3b7..54489765cff1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -308,9 +308,9 @@ define void @truncstore_v2i8_v2i1(<2 x i8> %x, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index e53876d69b59b..244e887a97353 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -10,9 +10,9 @@ define i1 @extractelt_v1i1(ptr %x, i64 %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -325,21 +325,21 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: addi s0, sp, 384 ; RV32-NEXT: andi sp, sp, -128 ; RV32-NEXT: andi a1, a1, 255 +; RV32-NEXT: li a2, 128 +; RV32-NEXT: addi a3, a0, 128 +; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; RV32-NEXT: vle8.v v24, (a3) ; RV32-NEXT: mv a2, sp -; RV32-NEXT: li a3, 128 -; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle8.v v16, (a0) +; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: add a1, a2, a1 -; RV32-NEXT: vmseq.vi v0, v8, 0 +; RV32-NEXT: vmseq.vi v8, v24, 0 ; RV32-NEXT: vmv.v.i v24, 0 -; RV32-NEXT: vmseq.vi v8, v16, 0 +; RV32-NEXT: vmseq.vi v0, v16, 0 ; RV32-NEXT: vmerge.vim v16, v24, 1, v0 ; RV32-NEXT: vse8.v v16, (a2) ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vmerge.vim v8, v24, 1, v0 -; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -384 @@ -356,21 +356,21 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: addi s0, sp, 384 ; RV64-NEXT: andi sp, sp, -128 ; RV64-NEXT: andi a1, a1, 255 +; RV64-NEXT: li a2, 128 +; RV64-NEXT: addi a3, a0, 128 +; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; RV64-NEXT: vle8.v v24, (a3) ; RV64-NEXT: mv a2, sp -; RV64-NEXT: li a3, 128 -; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle8.v v16, (a0) +; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: add a1, a2, a1 -; RV64-NEXT: vmseq.vi v0, v8, 0 +; RV64-NEXT: vmseq.vi v8, v24, 0 ; RV64-NEXT: vmv.v.i v24, 0 -; RV64-NEXT: vmseq.vi v8, v16, 0 +; RV64-NEXT: vmseq.vi v0, v16, 0 ; RV64-NEXT: vmerge.vim v16, v24, 1, v0 ; RV64-NEXT: vse8.v v16, (a2) ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vmerge.vim v8, v24, 1, v0 -; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -384 @@ -387,21 +387,21 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: addi s0, sp, 384 ; RV32ZBS-NEXT: andi sp, sp, -128 ; RV32ZBS-NEXT: andi a1, a1, 255 +; RV32ZBS-NEXT: li a2, 128 +; RV32ZBS-NEXT: addi a3, a0, 128 +; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; RV32ZBS-NEXT: vle8.v v24, (a3) ; RV32ZBS-NEXT: mv a2, sp -; RV32ZBS-NEXT: li a3, 128 -; RV32ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: addi a0, a0, 128 ; RV32ZBS-NEXT: vle8.v v16, (a0) +; RV32ZBS-NEXT: addi a0, sp, 128 ; RV32ZBS-NEXT: add a1, a2, a1 -; RV32ZBS-NEXT: vmseq.vi v0, v8, 0 +; RV32ZBS-NEXT: vmseq.vi v8, v24, 0 ; RV32ZBS-NEXT: vmv.v.i v24, 0 -; RV32ZBS-NEXT: vmseq.vi v8, v16, 0 +; RV32ZBS-NEXT: vmseq.vi v0, v16, 0 ; RV32ZBS-NEXT: vmerge.vim v16, v24, 1, v0 ; RV32ZBS-NEXT: vse8.v v16, (a2) ; RV32ZBS-NEXT: vmv1r.v v0, v8 ; RV32ZBS-NEXT: vmerge.vim v8, v24, 1, v0 -; RV32ZBS-NEXT: addi a0, sp, 128 ; RV32ZBS-NEXT: vse8.v v8, (a0) ; RV32ZBS-NEXT: lbu a0, 0(a1) ; RV32ZBS-NEXT: addi sp, s0, -384 @@ -418,21 +418,21 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: addi s0, sp, 384 ; RV64ZBS-NEXT: andi sp, sp, -128 ; RV64ZBS-NEXT: andi a1, a1, 255 +; RV64ZBS-NEXT: li a2, 128 +; RV64ZBS-NEXT: addi a3, a0, 128 +; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; RV64ZBS-NEXT: vle8.v v24, (a3) ; RV64ZBS-NEXT: mv a2, sp -; RV64ZBS-NEXT: li a3, 128 -; RV64ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: addi a0, a0, 128 ; RV64ZBS-NEXT: vle8.v v16, (a0) +; RV64ZBS-NEXT: addi a0, sp, 128 ; RV64ZBS-NEXT: add a1, a2, a1 -; RV64ZBS-NEXT: vmseq.vi v0, v8, 0 +; RV64ZBS-NEXT: vmseq.vi v8, v24, 0 ; RV64ZBS-NEXT: vmv.v.i v24, 0 -; RV64ZBS-NEXT: vmseq.vi v8, v16, 0 +; RV64ZBS-NEXT: vmseq.vi v0, v16, 0 ; RV64ZBS-NEXT: vmerge.vim v16, v24, 1, v0 ; RV64ZBS-NEXT: vse8.v v16, (a2) ; RV64ZBS-NEXT: vmv1r.v v0, v8 ; RV64ZBS-NEXT: vmerge.vim v8, v24, 1, v0 -; RV64ZBS-NEXT: addi a0, sp, 128 ; RV64ZBS-NEXT: vse8.v v8, (a0) ; RV64ZBS-NEXT: lbu a0, 0(a1) ; RV64ZBS-NEXT: addi sp, s0, -384 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll index e2711a0231509..e4dce91b0c788 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -474,12 +474,13 @@ define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { ; VLA-NEXT: vlm.v v0, (a0) ; VLA-NEXT: vmv.v.i v8, 0 ; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, m1, ta, ma ; VLA-NEXT: vslidedown.vi v8, v8, 2 ; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLA-NEXT: vmsne.vi v0, v8, 0 -; VLA-NEXT: vmv.v.i v8, 0 -; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vmerge.vim v8, v9, 1, v0 ; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -495,12 +496,13 @@ define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { ; VLS-NEXT: vlm.v v0, (a0) ; VLS-NEXT: vmv.v.i v8, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 +; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLS-NEXT: vmv.v.i v9, 0 ; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma ; VLS-NEXT: vslidedown.vi v8, v8, 2 ; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLS-NEXT: vmsne.vi v0, v8, 0 -; VLS-NEXT: vmv.v.i v8, 0 -; VLS-NEXT: vmerge.vim v8, v8, 1, v0 +; VLS-NEXT: vmerge.vim v8, v9, 1, v0 ; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLS-NEXT: vmv.v.i v9, 0 ; VLS-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -524,12 +526,13 @@ define void @extract_v2i1_v64i1_42(ptr %x, ptr %y) { ; VLA-NEXT: li a0, 42 ; VLA-NEXT: vmv.v.i v8, 0 ; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLA-NEXT: vmv.v.i v12, 0 ; VLA-NEXT: vsetivli zero, 2, e8, m4, ta, ma ; VLA-NEXT: vslidedown.vx v8, v8, a0 ; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLA-NEXT: vmsne.vi v0, v8, 0 -; VLA-NEXT: vmv.v.i v8, 0 -; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vmerge.vim v8, v12, 1, v0 ; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -545,11 +548,12 @@ define void @extract_v2i1_v64i1_42(ptr %x, ptr %y) { ; VLS-NEXT: vlm.v v0, (a0) ; VLS-NEXT: vmv.v.i v8, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 -; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma -; VLS-NEXT: vslidedown.vi v8, v10, 10 ; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; VLS-NEXT: vmsne.vi v0, v8, 0 ; VLS-NEXT: vmv.v.i v8, 0 +; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; VLS-NEXT: vslidedown.vi v9, v10, 10 +; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLS-NEXT: vmsne.vi v0, v9, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 ; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLS-NEXT: vmv.v.i v9, 0 @@ -590,12 +594,13 @@ define void @extract_v2i1_nxv2i1_2( %x, ptr %y) { ; VLA-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; VLA-NEXT: vmv.v.i v8, 0 ; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; VLA-NEXT: vslidedown.vi v8, v8, 2 ; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLA-NEXT: vmsne.vi v0, v8, 0 -; VLA-NEXT: vmv.v.i v8, 0 -; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vmerge.vim v8, v9, 1, v0 ; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -610,12 +615,13 @@ define void @extract_v2i1_nxv2i1_2( %x, ptr %y) { ; VLS-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; VLS-NEXT: vmv.v.i v8, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 +; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLS-NEXT: vmv.v.i v9, 0 ; VLS-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; VLS-NEXT: vslidedown.vi v8, v8, 2 ; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLS-NEXT: vmsne.vi v0, v8, 0 -; VLS-NEXT: vmv.v.i v8, 0 -; VLS-NEXT: vmerge.vim v8, v8, 1, v0 +; VLS-NEXT: vmerge.vim v8, v9, 1, v0 ; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLS-NEXT: vmv.v.i v9, 0 ; VLS-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -654,12 +660,13 @@ define void @extract_v2i1_nxv64i1_2( %x, ptr %y) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -680,12 +687,13 @@ define void @extract_v2i1_nxv64i1_42( %x, ptr %y) { ; VLA-NEXT: vmv.v.i v8, 0 ; VLA-NEXT: li a1, 42 ; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLA-NEXT: vmv.v.i v12, 0 ; VLA-NEXT: vsetivli zero, 2, e8, m4, ta, ma ; VLA-NEXT: vslidedown.vx v8, v8, a1 ; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLA-NEXT: vmsne.vi v0, v8, 0 -; VLA-NEXT: vmv.v.i v8, 0 -; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vmerge.vim v8, v12, 1, v0 ; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -700,11 +708,12 @@ define void @extract_v2i1_nxv64i1_42( %x, ptr %y) { ; VLS-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; VLS-NEXT: vmv.v.i v8, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 -; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma -; VLS-NEXT: vslidedown.vi v8, v10, 10 ; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; VLS-NEXT: vmsne.vi v0, v8, 0 ; VLS-NEXT: vmv.v.i v8, 0 +; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; VLS-NEXT: vslidedown.vi v9, v10, 10 +; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLS-NEXT: vmsne.vi v0, v9, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 ; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLS-NEXT: vmv.v.i v9, 0 @@ -725,12 +734,13 @@ define void @extract_v2i1_nxv32i1_26( %x, ptr %y) { ; VLA-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; VLA-NEXT: vmv.v.i v8, 0 ; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLA-NEXT: vmv.v.i v10, 0 ; VLA-NEXT: vsetivli zero, 2, e8, m2, ta, ma ; VLA-NEXT: vslidedown.vi v8, v8, 26 ; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLA-NEXT: vmsne.vi v0, v8, 0 -; VLA-NEXT: vmv.v.i v8, 0 -; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vmerge.vim v8, v10, 1, v0 ; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -745,11 +755,12 @@ define void @extract_v2i1_nxv32i1_26( %x, ptr %y) { ; VLS-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; VLS-NEXT: vmv.v.i v8, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 -; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma -; VLS-NEXT: vslidedown.vi v8, v9, 10 ; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; VLS-NEXT: vmsne.vi v0, v8, 0 ; VLS-NEXT: vmv.v.i v8, 0 +; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; VLS-NEXT: vslidedown.vi v9, v9, 10 +; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLS-NEXT: vmsne.vi v0, v9, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 ; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLS-NEXT: vmv.v.i v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll index ab2d00b9b9137..5f66197c7aef8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll @@ -12,9 +12,9 @@ define <1 x half> @ceil_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -35,9 +35,9 @@ define <2 x half> @ceil_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -58,9 +58,9 @@ define <4 x half> @ceil_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -81,9 +81,9 @@ define <8 x half> @ceil_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -104,9 +104,9 @@ define <16 x half> @ceil_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -128,9 +128,9 @@ define <32 x half> @ceil_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -151,9 +151,9 @@ define <1 x float> @ceil_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -174,9 +174,9 @@ define <2 x float> @ceil_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -197,9 +197,9 @@ define <4 x float> @ceil_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -220,9 +220,9 @@ define <8 x float> @ceil_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -243,9 +243,9 @@ define <16 x float> @ceil_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -266,9 +266,9 @@ define <1 x double> @ceil_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -289,9 +289,9 @@ define <2 x double> @ceil_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -312,9 +312,9 @@ define <4 x double> @ceil_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -335,9 +335,9 @@ define <8 x double> @ceil_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll index c6ce7c1bbe8b4..8827faa895dd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll @@ -12,9 +12,9 @@ define <1 x half> @floor_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -35,9 +35,9 @@ define <2 x half> @floor_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -58,9 +58,9 @@ define <4 x half> @floor_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -81,9 +81,9 @@ define <8 x half> @floor_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -104,9 +104,9 @@ define <16 x half> @floor_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -128,9 +128,9 @@ define <32 x half> @floor_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -151,9 +151,9 @@ define <1 x float> @floor_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -174,9 +174,9 @@ define <2 x float> @floor_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -197,9 +197,9 @@ define <4 x float> @floor_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -220,9 +220,9 @@ define <8 x float> @floor_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -243,9 +243,9 @@ define <16 x float> @floor_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -266,9 +266,9 @@ define <1 x double> @floor_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -289,9 +289,9 @@ define <2 x double> @floor_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -312,9 +312,9 @@ define <4 x double> @floor_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -335,9 +335,9 @@ define <8 x double> @floor_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index 6fc0165d7e77f..cd3cb2da8abcd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -17,9 +17,9 @@ define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -35,12 +35,12 @@ define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -63,8 +63,8 @@ define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -77,11 +77,11 @@ define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -103,9 +103,9 @@ define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -121,12 +121,12 @@ define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -149,8 +149,8 @@ define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,11 +163,11 @@ define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -189,9 +189,9 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -207,12 +207,12 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -235,8 +235,8 @@ define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,11 +249,11 @@ define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -271,14 +271,14 @@ declare <16 x half> @llvm.vp.floor.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -295,12 +295,12 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -323,8 +323,8 @@ define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -337,11 +337,11 @@ define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,9 +363,9 @@ define <2 x float> @vp_floor_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -384,8 +384,8 @@ define <2 x float> @vp_floor_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -405,9 +405,9 @@ define <4 x float> @vp_floor_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -426,8 +426,8 @@ define <4 x float> @vp_floor_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -444,13 +444,13 @@ define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-LABEL: vp_floor_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -470,8 +470,8 @@ define <8 x float> @vp_floor_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -488,13 +488,13 @@ define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-LABEL: vp_floor_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,8 +514,8 @@ define <16 x float> @vp_floor_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -535,9 +535,9 @@ define <2 x double> @vp_floor_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext % ; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -556,8 +556,8 @@ define <2 x double> @vp_floor_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -573,14 +573,14 @@ declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -600,8 +600,8 @@ define <4 x double> @vp_floor_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -617,14 +617,14 @@ declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -644,8 +644,8 @@ define <8 x double> @vp_floor_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -661,14 +661,14 @@ declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI22_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -688,8 +688,8 @@ define <15 x double> @vp_floor_v15f64_unmasked(<15 x double> %va, i32 zeroext %e ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -705,14 +705,14 @@ declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI24_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -732,8 +732,8 @@ define <16 x double> @vp_floor_v16f64_unmasked(<16 x double> %va, i32 zeroext %e ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -749,44 +749,44 @@ declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: lui a2, %hi(.LCPI26_0) +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: fsrmi a2, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vmv1r.v v5, v7 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -806,28 +806,25 @@ define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: lui a1, %hi(.LCPI27_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: fsrmi a2, 2 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index eb3209fa7a2a4..851ec44b1625e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -59,16 +59,14 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -127,16 +125,14 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -197,15 +193,13 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -269,15 +263,13 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -566,9 +558,8 @@ define <16 x double> @vfmax_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -584,25 +575,25 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -613,29 +604,15 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: .LBB24_2: ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmfeq.vv v5, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vv v4, v24, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 @@ -643,36 +620,29 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v6, v24, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmfeq.vv v5, v8, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v16, v24, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vfmax.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -737,9 +707,8 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vfmax.vv v16, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll index 04e73ac1ea956..12c3f5582e2e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -24,16 +24,14 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-LABEL: vfmax_v2f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -57,16 +55,14 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) { ; ZVFHMIN-LABEL: vfmax_v4f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -90,15 +86,13 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) { ; ZVFHMIN-LABEL: vfmax_v8f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -123,15 +117,13 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) { ; ZVFHMIN-LABEL: vfmax_v16f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -259,9 +251,8 @@ define <16 x double> @vfmax_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwi ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 390e865ea1f9b..6747dcebef38e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -59,16 +59,14 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmin_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -127,16 +125,14 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmin_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -197,15 +193,13 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmin_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -269,15 +263,13 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-LABEL: vfmin_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -566,9 +558,8 @@ define <16 x double> @vfmin_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -584,25 +575,25 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -613,29 +604,15 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: .LBB24_2: ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmfeq.vv v5, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vv v4, v24, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 @@ -643,36 +620,29 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v6, v24, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmfeq.vv v5, v8, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v16, v24, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vfmin.vv v16, v24, v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -737,9 +707,8 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vfmin.vv v16, v16, v24 ; CHECK-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll index a0334a9a5d20a..9492c9d288af5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -24,16 +24,14 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-LABEL: vfmin_v2f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -57,16 +55,14 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) { ; ZVFHMIN-LABEL: vfmin_v4f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -90,15 +86,13 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) { ; ZVFHMIN-LABEL: vfmin_v8f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -123,15 +117,13 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) { ; ZVFHMIN-LABEL: vfmin_v16f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -259,9 +251,8 @@ define <16 x double> @vfmin_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwi ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll index 3a7ded1537ef6..1516340a24415 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll @@ -9,10 +9,10 @@ declare <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half>, me define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -33,10 +33,10 @@ declare <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half>, me define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -57,10 +57,10 @@ declare <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half>, me define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -81,10 +81,10 @@ declare <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half>, define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -107,9 +107,9 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: lui a1, %hi(.LCPI4_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -226,10 +226,10 @@ declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -250,10 +250,10 @@ declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI10_0)(a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -274,10 +274,10 @@ declare <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index eb40c133514fe..343eee4ca7338 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -39,10 +39,11 @@ define void @buildvec_no_vid_v4f32(ptr %x) { define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, <8 x float> %y) optsize { ; CHECK-LABEL: hang_when_merging_stores_after_legalization: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v12, v10, 4 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vslidedown.vi v12, v10, 4 ; CHECK-NEXT: vslideup.vi v12, v10, 2, v0.t ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 @@ -1750,13 +1751,13 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float ; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v8, fa0 ; CHECK-NEXT: vfmv.v.f v9, fa4 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: vfslide1down.vf v9, v9, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 ; CHECK-NEXT: vfslide1down.vf v9, v9, fa6 ; CHECK-NEXT: vfslide1down.vf v10, v8, fa3 ; CHECK-NEXT: vfslide1down.vf v8, v9, fa7 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v10, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x float> poison, float %e0, i64 0 @@ -1801,13 +1802,13 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d ; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v8, fa0 ; CHECK-NEXT: vfmv.v.f v9, fa4 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: vfslide1down.vf v9, v9, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 ; CHECK-NEXT: vfslide1down.vf v9, v9, fa6 ; CHECK-NEXT: vfslide1down.vf v10, v8, fa3 ; CHECK-NEXT: vfslide1down.vf v8, v9, fa7 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v10, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x double> poison, double %e0, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll index abb929eaaf6e6..ec9883220327c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll @@ -692,12 +692,13 @@ define void @fcmp_oeq_vf_v8f16_nonans(ptr %x, half %y, ptr %z) { define void @fcmp_une_vf_v4f32(ptr %x, float %y, ptr %z) { ; CHECK-LABEL: fcmp_une_vf_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma @@ -717,12 +718,13 @@ define void @fcmp_une_vf_v4f32(ptr %x, float %y, ptr %z) { define void @fcmp_une_vf_v4f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK-LABEL: fcmp_une_vf_v4f32_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma @@ -742,12 +744,13 @@ define void @fcmp_une_vf_v4f32_nonans(ptr %x, float %y, ptr %z) { define void @fcmp_ogt_vf_v2f64(ptr %x, double %y, ptr %z) { ; CHECK-LABEL: fcmp_ogt_vf_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -767,12 +770,13 @@ define void @fcmp_ogt_vf_v2f64(ptr %x, double %y, ptr %z) { define void @fcmp_ogt_vf_v2f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK-LABEL: fcmp_ogt_vf_v2f64_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -1333,12 +1337,13 @@ define void @fcmp_oeq_fv_v8f16_nonans(ptr %x, half %y, ptr %z) { define void @fcmp_une_fv_v4f32(ptr %x, float %y, ptr %z) { ; CHECK-LABEL: fcmp_une_fv_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma @@ -1358,12 +1363,13 @@ define void @fcmp_une_fv_v4f32(ptr %x, float %y, ptr %z) { define void @fcmp_une_fv_v4f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK-LABEL: fcmp_une_fv_v4f32_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma @@ -1383,12 +1389,13 @@ define void @fcmp_une_fv_v4f32_nonans(ptr %x, float %y, ptr %z) { define void @fcmp_ogt_fv_v2f64(ptr %x, double %y, ptr %z) { ; CHECK-LABEL: fcmp_ogt_fv_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -1408,12 +1415,13 @@ define void @fcmp_ogt_fv_v2f64(ptr %x, double %y, ptr %z) { define void @fcmp_ogt_fv_v2f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK-LABEL: fcmp_ogt_fv_v2f64_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 38df622998bf9..0516575bca0b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -3876,10 +3876,10 @@ define void @trunc_v8bf16(ptr %x) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -3901,10 +3901,10 @@ define void @trunc_v6bf16(ptr %x) { ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -3941,10 +3941,10 @@ define void @trunc_v8f16(ptr %x) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v10, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -3981,10 +3981,10 @@ define void @trunc_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v10, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4006,8 +4006,8 @@ define void @trunc_v4f32(ptr %x) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4049,11 +4049,11 @@ define void @ceil_v8bf16(ptr %x) { ; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4076,11 +4076,11 @@ define void @ceil_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4103,9 +4103,9 @@ define void @ceil_v8f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI177_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI177_0)(a1) +; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4120,11 +4120,11 @@ define void @ceil_v8f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v10, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v10, v8 ; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4147,9 +4147,9 @@ define void @ceil_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI178_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI178_0)(a1) +; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4164,11 +4164,11 @@ define void @ceil_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v10, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v10, v8 ; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4191,9 +4191,9 @@ define void @ceil_v4f32(ptr %x) { ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4214,9 +4214,9 @@ define void @ceil_v2f64(ptr %x) { ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI180_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI180_0)(a1) +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4237,11 +4237,11 @@ define void @floor_v8bf16(ptr %x) { ; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4264,11 +4264,11 @@ define void @floor_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4291,9 +4291,9 @@ define void @floor_v8f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI183_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI183_0)(a1) +; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4308,11 +4308,11 @@ define void @floor_v8f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v10, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 2 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v10, v8 ; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4335,9 +4335,9 @@ define void @floor_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI184_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI184_0)(a1) +; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4352,11 +4352,11 @@ define void @floor_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v10, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 2 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v10, v8 ; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4379,9 +4379,9 @@ define void @floor_v4f32(ptr %x) { ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4402,9 +4402,9 @@ define void @floor_v2f64(ptr %x) { ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI186_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI186_0)(a1) +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4425,11 +4425,11 @@ define void @round_v8bf16(ptr %x) { ; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4452,11 +4452,11 @@ define void @round_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4479,9 +4479,9 @@ define void @round_v8f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI189_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI189_0)(a1) +; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4496,11 +4496,11 @@ define void @round_v8f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v10, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v10, v8 ; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4523,9 +4523,9 @@ define void @round_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI190_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI190_0)(a1) +; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4540,11 +4540,11 @@ define void @round_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v10, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v10, v8 ; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4567,9 +4567,9 @@ define void @round_v4f32(ptr %x) { ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4590,9 +4590,9 @@ define void @round_v2f64(ptr %x) { ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI192_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI192_0)(a1) +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4612,10 +4612,10 @@ define void @rint_v8bf16(ptr %x) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4652,10 +4652,10 @@ define void @rint_v8f16(ptr %x) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vle16.v v10, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v10, v8 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 ; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -4677,8 +4677,8 @@ define void @rint_v4f32(ptr %x) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 -; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.w.x fa5, a1 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4720,11 +4720,11 @@ define void @nearbyint_v8bf16(ptr %x) { ; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a1 @@ -4747,9 +4747,9 @@ define void @nearbyint_v8f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI198_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI198_0)(a1) +; ZVFH-NEXT: frflags a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: frflags a1 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: fsflags a1 @@ -4764,11 +4764,11 @@ define void @nearbyint_v8f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v10, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: frflags a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v10, v8 ; ZVFHMIN-NEXT: vmflt.vf v0, v10, fa5 -; ZVFHMIN-NEXT: frflags a1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v10, v10, v0.t ; ZVFHMIN-NEXT: fsflags a1 @@ -4791,9 +4791,9 @@ define void @nearbyint_v4f32(ptr %x) { ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a1 @@ -4814,9 +4814,9 @@ define void @nearbyint_v2f64(ptr %x) { ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI200_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI200_0)(a1) +; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll index be32c033fe373..4305c38cd2a0a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll @@ -14,9 +14,9 @@ define <1 x half> @round_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -37,9 +37,9 @@ define <2 x half> @round_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -60,9 +60,9 @@ define <4 x half> @round_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -83,9 +83,9 @@ define <8 x half> @round_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -106,9 +106,9 @@ define <16 x half> @round_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -130,9 +130,9 @@ define <32 x half> @round_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -153,9 +153,9 @@ define <1 x float> @round_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -176,9 +176,9 @@ define <2 x float> @round_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -199,9 +199,9 @@ define <4 x float> @round_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -222,9 +222,9 @@ define <8 x float> @round_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -245,9 +245,9 @@ define <16 x float> @round_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -268,9 +268,9 @@ define <1 x double> @round_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -291,9 +291,9 @@ define <2 x double> @round_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -314,9 +314,9 @@ define <4 x double> @round_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -337,9 +337,9 @@ define <8 x double> @round_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll index 774ce5c7859c9..97c8cb4e57e5c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll @@ -17,8 +17,8 @@ define <1 x half> @round_v1f16(<1 x half> %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,11 +31,11 @@ define <1 x half> @round_v1f16(<1 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -56,8 +56,8 @@ define <2 x half> @round_v2f16(<2 x half> %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -70,11 +70,11 @@ define <2 x half> @round_v2f16(<2 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -95,8 +95,8 @@ define <4 x half> @round_v4f16(<4 x half> %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -109,11 +109,11 @@ define <4 x half> @round_v4f16(<4 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -134,8 +134,8 @@ define <8 x half> @round_v8f16(<8 x half> %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -148,11 +148,11 @@ define <8 x half> @round_v8f16(<8 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -173,8 +173,8 @@ define <16 x half> @round_v16f16(<16 x half> %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -187,11 +187,11 @@ define <16 x half> @round_v16f16(<16 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -211,12 +211,12 @@ define <32 x half> @round_v32f16(<32 x half> %x) { ; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: li a0, 32 +; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t -; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t @@ -226,15 +226,15 @@ define <32 x half> @round_v32f16(<32 x half> %x) { ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: li a0, 32 ; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t -; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t @@ -253,8 +253,8 @@ define <1 x float> @round_v1f32(<1 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -273,8 +273,8 @@ define <2 x float> @round_v2f32(<2 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -293,8 +293,8 @@ define <4 x float> @round_v4f32(<4 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -313,8 +313,8 @@ define <8 x float> @round_v8f32(<8 x float> %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -333,8 +333,8 @@ define <16 x float> @round_v16f32(<16 x float> %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -353,8 +353,8 @@ define <1 x double> @round_v1f64(<1 x double> %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -373,8 +373,8 @@ define <2 x double> @round_v2f64(<2 x double> %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -393,8 +393,8 @@ define <4 x double> @round_v4f64(<4 x double> %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -413,8 +413,8 @@ define <8 x double> @round_v8f64(<8 x double> %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll index 5c0279e133dfa..451969ed6b7f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll @@ -14,9 +14,9 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -37,9 +37,9 @@ define <2 x half> @roundeven_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -60,9 +60,9 @@ define <4 x half> @roundeven_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -83,9 +83,9 @@ define <8 x half> @roundeven_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -106,9 +106,9 @@ define <16 x half> @roundeven_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -130,9 +130,9 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -153,9 +153,9 @@ define <1 x float> @roundeven_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -176,9 +176,9 @@ define <2 x float> @roundeven_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -199,9 +199,9 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -222,9 +222,9 @@ define <8 x float> @roundeven_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -245,9 +245,9 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -268,9 +268,9 @@ define <1 x double> @roundeven_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -291,9 +291,9 @@ define <2 x double> @roundeven_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -314,9 +314,9 @@ define <4 x double> @roundeven_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -337,9 +337,9 @@ define <8 x double> @roundeven_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll index 0b6baad127643..83322648d9dd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll @@ -17,8 +17,8 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,11 +31,11 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -56,8 +56,8 @@ define <2 x half> @roundeven_v2f16(<2 x half> %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -70,11 +70,11 @@ define <2 x half> @roundeven_v2f16(<2 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -95,8 +95,8 @@ define <4 x half> @roundeven_v4f16(<4 x half> %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -109,11 +109,11 @@ define <4 x half> @roundeven_v4f16(<4 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -134,8 +134,8 @@ define <8 x half> @roundeven_v8f16(<8 x half> %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -148,11 +148,11 @@ define <8 x half> @roundeven_v8f16(<8 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -173,8 +173,8 @@ define <16 x half> @roundeven_v16f16(<16 x half> %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -187,11 +187,11 @@ define <16 x half> @roundeven_v16f16(<16 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -211,12 +211,12 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) { ; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: li a0, 32 +; ZVFH-NEXT: fsrmi a1, 0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t -; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t @@ -226,15 +226,15 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) { ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: li a0, 32 ; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t -; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t @@ -253,8 +253,8 @@ define <1 x float> @roundeven_v1f32(<1 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -273,8 +273,8 @@ define <2 x float> @roundeven_v2f32(<2 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -293,8 +293,8 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -313,8 +313,8 @@ define <8 x float> @roundeven_v8f32(<8 x float> %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -333,8 +333,8 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -353,8 +353,8 @@ define <1 x double> @roundeven_v1f64(<1 x double> %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -373,8 +373,8 @@ define <2 x double> @roundeven_v2f64(<2 x double> %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -393,8 +393,8 @@ define <4 x double> @roundeven_v4f64(<4 x double> %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -413,8 +413,8 @@ define <8 x double> @roundeven_v8f64(<8 x double> %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll index fc7cd94ca3de8..b8fe251860ae6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll @@ -9,9 +9,9 @@ define <2 x i8> @fshr_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -26,9 +26,9 @@ define <2 x i8> @fshl_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -43,9 +43,9 @@ define <4 x i8> @fshr_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -60,9 +60,9 @@ define <4 x i8> @fshl_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -77,9 +77,9 @@ define <8 x i8> @fshr_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -94,9 +94,9 @@ define <8 x i8> @fshl_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -111,9 +111,9 @@ define <16 x i8> @fshr_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -128,9 +128,9 @@ define <16 x i8> @fshl_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -145,9 +145,9 @@ define <32 x i8> @fshr_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v14, v12, v0.t +; CHECK-NEXT: vand.vi v12, v12, 7, v0.t ; CHECK-NEXT: vand.vi v14, v14, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t -; CHECK-NEXT: vand.vi v12, v12, 7, v0.t ; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret @@ -162,9 +162,9 @@ define <32 x i8> @fshl_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vsrl.vi v10, v10, 1, v0.t ; CHECK-NEXT: vnot.v v14, v12, v0.t +; CHECK-NEXT: vand.vi v12, v12, 7, v0.t ; CHECK-NEXT: vand.vi v14, v14, 7, v0.t ; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t -; CHECK-NEXT: vand.vi v12, v12, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret @@ -179,9 +179,9 @@ define <64 x i8> @fshr_v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v20, v16, v0.t +; CHECK-NEXT: vand.vi v16, v16, 7, v0.t ; CHECK-NEXT: vand.vi v20, v20, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t -; CHECK-NEXT: vand.vi v16, v16, 7, v0.t ; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret @@ -196,9 +196,9 @@ define <64 x i8> @fshl_v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vsrl.vi v12, v12, 1, v0.t ; CHECK-NEXT: vnot.v v20, v16, v0.t +; CHECK-NEXT: vand.vi v16, v16, 7, v0.t ; CHECK-NEXT: vand.vi v20, v20, 7, v0.t ; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t -; CHECK-NEXT: vand.vi v16, v16, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret @@ -213,9 +213,9 @@ define <2 x i16> @fshr_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -230,9 +230,9 @@ define <2 x i16> @fshl_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -247,9 +247,9 @@ define <4 x i16> @fshr_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -264,9 +264,9 @@ define <4 x i16> @fshl_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -281,9 +281,9 @@ define <8 x i16> @fshr_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -298,9 +298,9 @@ define <8 x i16> @fshl_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -315,9 +315,9 @@ define <16 x i16> @fshr_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v14, v12, v0.t +; CHECK-NEXT: vand.vi v12, v12, 15, v0.t ; CHECK-NEXT: vand.vi v14, v14, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t -; CHECK-NEXT: vand.vi v12, v12, 15, v0.t ; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret @@ -332,9 +332,9 @@ define <16 x i16> @fshl_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsrl.vi v10, v10, 1, v0.t ; CHECK-NEXT: vnot.v v14, v12, v0.t +; CHECK-NEXT: vand.vi v12, v12, 15, v0.t ; CHECK-NEXT: vand.vi v14, v14, 15, v0.t ; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t -; CHECK-NEXT: vand.vi v12, v12, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret @@ -349,9 +349,9 @@ define <32 x i16> @fshr_v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v20, v16, v0.t +; CHECK-NEXT: vand.vi v16, v16, 15, v0.t ; CHECK-NEXT: vand.vi v20, v20, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t -; CHECK-NEXT: vand.vi v16, v16, 15, v0.t ; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret @@ -366,9 +366,9 @@ define <32 x i16> @fshl_v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsrl.vi v12, v12, 1, v0.t ; CHECK-NEXT: vnot.v v20, v16, v0.t +; CHECK-NEXT: vand.vi v16, v16, 15, v0.t ; CHECK-NEXT: vand.vi v20, v20, 15, v0.t ; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t -; CHECK-NEXT: vand.vi v16, v16, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret @@ -382,12 +382,12 @@ define <2 x i32> @fshr_v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call <2 x i32> @llvm.vp.fshr.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 %evl) @@ -400,12 +400,12 @@ define <2 x i32> @fshl_v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call <2 x i32> @llvm.vp.fshl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 %evl) @@ -418,12 +418,12 @@ define <4 x i32> @fshr_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call <4 x i32> @llvm.vp.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 %evl) @@ -436,12 +436,12 @@ define <4 x i32> @fshl_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call <4 x i32> @llvm.vp.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 %evl) @@ -454,12 +454,12 @@ define <8 x i32> @fshr_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vand.vx v14, v12, a1, v0.t -; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t -; CHECK-NEXT: vnot.v v12, v12, v0.t -; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vnot.v v14, v12, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t +; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vand.vx v14, v14, a1, v0.t +; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %res = call <8 x i32> @llvm.vp.fshr.v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 %evl) @@ -472,12 +472,12 @@ define <8 x i32> @fshl_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vand.vx v14, v12, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t -; CHECK-NEXT: vnot.v v12, v12, v0.t -; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vnot.v v14, v12, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 1, v0.t -; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t +; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vand.vx v14, v14, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %res = call <8 x i32> @llvm.vp.fshl.v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 %evl) @@ -490,12 +490,12 @@ define <16 x i32> @fshr_v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vand.vx v20, v16, a1, v0.t -; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t -; CHECK-NEXT: vnot.v v16, v16, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v20, v16, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v20, v20, a1, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %res = call <16 x i32> @llvm.vp.fshr.v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 %evl) @@ -508,12 +508,12 @@ define <16 x i32> @fshl_v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vand.vx v20, v16, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t -; CHECK-NEXT: vnot.v v16, v16, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v20, v16, v0.t ; CHECK-NEXT: vsrl.vi v12, v12, 1, v0.t -; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v20, v20, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %res = call <16 x i32> @llvm.vp.fshl.v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 %evl) @@ -526,12 +526,12 @@ define <2 x i64> @fshr_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 %evl) @@ -544,12 +544,12 @@ define <2 x i64> @fshl_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 %evl) @@ -562,12 +562,12 @@ define <4 x i64> @fshr_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vand.vx v14, v12, a1, v0.t -; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t -; CHECK-NEXT: vnot.v v12, v12, v0.t -; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vnot.v v14, v12, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t +; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vand.vx v14, v14, a1, v0.t +; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %res = call <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 %evl) @@ -580,12 +580,12 @@ define <4 x i64> @fshl_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vand.vx v14, v12, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t -; CHECK-NEXT: vnot.v v12, v12, v0.t -; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vnot.v v14, v12, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 1, v0.t -; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t +; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vand.vx v14, v14, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %res = call <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 %evl) @@ -598,12 +598,12 @@ define <7 x i64> @fshr_v7i64(<7 x i64> %a, <7 x i64> %b, <7 x i64> %c, <7 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vand.vx v20, v16, a1, v0.t -; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t -; CHECK-NEXT: vnot.v v16, v16, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v20, v16, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v20, v20, a1, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %res = call <7 x i64> @llvm.vp.fshr.v7i64(<7 x i64> %a, <7 x i64> %b, <7 x i64> %c, <7 x i1> %m, i32 %evl) @@ -616,12 +616,12 @@ define <7 x i64> @fshl_v7i64(<7 x i64> %a, <7 x i64> %b, <7 x i64> %c, <7 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vand.vx v20, v16, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t -; CHECK-NEXT: vnot.v v16, v16, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v20, v16, v0.t ; CHECK-NEXT: vsrl.vi v12, v12, 1, v0.t -; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v20, v20, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %res = call <7 x i64> @llvm.vp.fshl.v7i64(<7 x i64> %a, <7 x i64> %b, <7 x i64> %c, <7 x i1> %m, i32 %evl) @@ -634,12 +634,12 @@ define <8 x i64> @fshr_v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vand.vx v20, v16, a1, v0.t -; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t -; CHECK-NEXT: vnot.v v16, v16, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v20, v16, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v20, v20, a1, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %res = call <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 %evl) @@ -652,12 +652,12 @@ define <8 x i64> @fshl_v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vand.vx v20, v16, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t -; CHECK-NEXT: vnot.v v16, v16, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v20, v16, v0.t ; CHECK-NEXT: vsrl.vi v12, v12, 1, v0.t -; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v20, v20, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %res = call <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll index 2173887e85417..f183a4f9e5fb9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll @@ -7,10 +7,10 @@ define <1 x half> @trunc_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: trunc_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -28,10 +28,10 @@ declare <1 x half> @llvm.experimental.constrained.trunc.v1f16(<1 x half>, metada define <2 x half> @trunc_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: trunc_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -49,10 +49,10 @@ declare <2 x half> @llvm.experimental.constrained.trunc.v2f16(<2 x half>, metada define <4 x half> @trunc_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: trunc_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -70,10 +70,10 @@ declare <4 x half> @llvm.experimental.constrained.trunc.v4f16(<4 x half>, metada define <8 x half> @trunc_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: trunc_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -91,10 +91,10 @@ declare <8 x half> @llvm.experimental.constrained.trunc.v8f16(<8 x half>, metada define <16 x half> @trunc_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: trunc_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -114,9 +114,9 @@ define <32 x half> @trunc_v32f16(<32 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: lui a1, %hi(.LCPI5_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -138,8 +138,8 @@ define <1 x float> @trunc_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -159,8 +159,8 @@ define <2 x float> @trunc_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -180,8 +180,8 @@ define <4 x float> @trunc_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -201,8 +201,8 @@ define <8 x float> @trunc_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t @@ -222,8 +222,8 @@ define <16 x float> @trunc_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t @@ -239,10 +239,10 @@ declare <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float>, m define <1 x double> @trunc_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: trunc_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -260,10 +260,10 @@ declare <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double>, me define <2 x double> @trunc_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -281,10 +281,10 @@ declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, me define <4 x double> @trunc_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: trunc_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -302,10 +302,10 @@ declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, me define <8 x double> @trunc_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: trunc_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index a91263e85e9e8..db8fdb7fe1f26 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -935,11 +935,13 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_1() vscale_range(16, ; RV32: # %bb.0: ; RV32-NEXT: li a0, 512 ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma -; RV32-NEXT: vid.v v8 -; RV32-NEXT: vsrl.vi v8, v8, 3 -; RV32-NEXT: vadd.vi v0, v8, -1 +; RV32-NEXT: vid.v v12 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmv.v.i v8, 1 +; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma +; RV32-NEXT: vsrl.vi v12, v12, 3 +; RV32-NEXT: vadd.vi v0, v12, -1 +; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: ret ; @@ -947,11 +949,13 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_1() vscale_range(16, ; RV64V: # %bb.0: ; RV64V-NEXT: li a0, 512 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64V-NEXT: vid.v v8 -; RV64V-NEXT: vsrl.vi v8, v8, 2 -; RV64V-NEXT: vadd.vi v0, v8, -1 +; RV64V-NEXT: vid.v v12 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmv.v.i v8, 1 +; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; RV64V-NEXT: vsrl.vi v12, v12, 2 +; RV64V-NEXT: vadd.vi v0, v12, -1 +; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64V-NEXT: ret ; @@ -959,11 +963,13 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_1() vscale_range(16, ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: li a0, 512 ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma -; RV64ZVE32-NEXT: vid.v v8 -; RV64ZVE32-NEXT: vsrl.vi v8, v8, 3 -; RV64ZVE32-NEXT: vadd.vi v0, v8, -1 +; RV64ZVE32-NEXT: vid.v v12 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmv.v.i v8, 1 +; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; RV64ZVE32-NEXT: vsrl.vi v12, v12, 3 +; RV64ZVE32-NEXT: vadd.vi v0, v12, -1 +; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64ZVE32-NEXT: ret ret <512 x i8> @@ -985,18 +991,15 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, ; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: li a1, 15 +; RV32-NEXT: slli a1, a1, 8 ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma ; RV32-NEXT: vmerge.vim v13, v12, -1, v0 -; RV32-NEXT: slli a1, a1, 8 +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vmerge.vim v12, v12, -1, v0 ; RV32-NEXT: vmv1r.v v0, v13 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma -; RV32-NEXT: vmerge.vim v12, v12, -1, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 2, v0 ; RV32-NEXT: ret ; @@ -1007,22 +1010,22 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, ; RV64V-NEXT: vmv.v.i v12, 0 ; RV64V-NEXT: li a0, 512 ; RV64V-NEXT: vmerge.vim v13, v12, -1, v0 +; RV64V-NEXT: vmv.v.i v0, 12 +; RV64V-NEXT: li a1, 48 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmv.v.i v8, 3 -; RV64V-NEXT: vmv1r.v v0, v13 -; RV64V-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64V-NEXT: vmv.v.i v0, 12 -; RV64V-NEXT: vmerge.vim v13, v12, -1, v0 -; RV64V-NEXT: li a1, 48 -; RV64V-NEXT: vmv.v.v v0, v13 +; RV64V-NEXT: vmerge.vim v14, v12, -1, v0 +; RV64V-NEXT: vmv1r.v v0, v13 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64V-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64V-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64V-NEXT: vmv.s.x v0, a1 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64V-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64V-NEXT: vmv.v.v v0, v12 +; RV64V-NEXT: vmv1r.v v0, v14 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV64V-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64V-NEXT: vmv1r.v v0, v12 ; RV64V-NEXT: vmerge.vim v8, v8, 2, v0 ; RV64V-NEXT: ret ; @@ -1041,18 +1044,15 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, ; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; RV64ZVE32-NEXT: vmv.s.x v0, a1 ; RV64ZVE32-NEXT: li a1, 15 +; RV64ZVE32-NEXT: slli a1, a1, 8 ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v13, v12, -1, v0 -; RV64ZVE32-NEXT: slli a1, a1, 8 -; RV64ZVE32-NEXT: vmv.v.v v0, v13 -; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64ZVE32-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; RV64ZVE32-NEXT: vmv.s.x v0, a1 -; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64ZVE32-NEXT: vmv.v.v v0, v12 +; RV64ZVE32-NEXT: vmv.v.v v0, v13 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV64ZVE32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64ZVE32-NEXT: vmv1r.v v0, v12 ; RV64ZVE32-NEXT: vmerge.vim v8, v8, 2, v0 ; RV64ZVE32-NEXT: ret ret <512 x i8> @@ -1357,15 +1357,13 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV32-ONLY-NEXT: lbu t2, 9(a0) ; RV32-ONLY-NEXT: lbu t3, 10(a0) ; RV32-ONLY-NEXT: lbu t4, 11(a0) -; RV32-ONLY-NEXT: li t5, 255 -; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-ONLY-NEXT: vmv.s.x v0, t5 ; RV32-ONLY-NEXT: lbu t5, 12(a0) ; RV32-ONLY-NEXT: lbu t6, 13(a0) ; RV32-ONLY-NEXT: lbu s0, 14(a0) ; RV32-ONLY-NEXT: lbu a0, 15(a0) -; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 +; RV32-ONLY-NEXT: li a1, 255 ; RV32-ONLY-NEXT: vmv.v.x v9, t1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t2 @@ -1381,6 +1379,9 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, s0 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a0 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a1 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-ONLY-NEXT: .cfi_restore s0 @@ -1492,15 +1493,13 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV64V-ONLY-NEXT: lbu t2, 9(a0) ; RV64V-ONLY-NEXT: lbu t3, 10(a0) ; RV64V-ONLY-NEXT: lbu t4, 11(a0) -; RV64V-ONLY-NEXT: li t5, 255 -; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.s.x v0, t5 ; RV64V-ONLY-NEXT: lbu t5, 12(a0) ; RV64V-ONLY-NEXT: lbu t6, 13(a0) ; RV64V-ONLY-NEXT: lbu s0, 14(a0) ; RV64V-ONLY-NEXT: lbu a0, 15(a0) -; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 +; RV64V-ONLY-NEXT: li a1, 255 ; RV64V-ONLY-NEXT: vmv.v.x v9, t1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t2 @@ -1516,6 +1515,9 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, s0 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a0 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64V-ONLY-NEXT: vmv.s.x v0, a1 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64V-ONLY-NEXT: .cfi_restore s0 @@ -1629,15 +1631,13 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV64ZVE32-NEXT: lbu t2, 9(a0) ; RV64ZVE32-NEXT: lbu t3, 10(a0) ; RV64ZVE32-NEXT: lbu t4, 11(a0) -; RV64ZVE32-NEXT: li t5, 255 -; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32-NEXT: vmv.s.x v0, t5 ; RV64ZVE32-NEXT: lbu t5, 12(a0) ; RV64ZVE32-NEXT: lbu t6, 13(a0) ; RV64ZVE32-NEXT: lbu s0, 14(a0) ; RV64ZVE32-NEXT: lbu a0, 15(a0) -; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 +; RV64ZVE32-NEXT: li a1, 255 ; RV64ZVE32-NEXT: vmv.v.x v9, t1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t2 @@ -1653,6 +1653,9 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, s0 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, t0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64ZVE32-NEXT: .cfi_restore s0 @@ -1731,15 +1734,13 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV32-ONLY-NEXT: lbu t2, 154(a0) ; RV32-ONLY-NEXT: lbu t3, 161(a0) ; RV32-ONLY-NEXT: lbu t4, 163(a0) -; RV32-ONLY-NEXT: li t5, 255 -; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-ONLY-NEXT: vmv.s.x v0, t5 ; RV32-ONLY-NEXT: lbu t5, 93(a0) ; RV32-ONLY-NEXT: lbu t6, 105(a0) ; RV32-ONLY-NEXT: lbu s0, 124(a0) ; RV32-ONLY-NEXT: lbu a0, 144(a0) -; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 +; RV32-ONLY-NEXT: li a1, 255 ; RV32-ONLY-NEXT: vmv.v.x v9, t1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t5 @@ -1755,6 +1756,9 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, t2 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a1 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-ONLY-NEXT: .cfi_restore s0 @@ -1866,15 +1870,13 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV64V-ONLY-NEXT: lbu t2, 154(a0) ; RV64V-ONLY-NEXT: lbu t3, 161(a0) ; RV64V-ONLY-NEXT: lbu t4, 163(a0) -; RV64V-ONLY-NEXT: li t5, 255 -; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.s.x v0, t5 ; RV64V-ONLY-NEXT: lbu t5, 93(a0) ; RV64V-ONLY-NEXT: lbu t6, 105(a0) ; RV64V-ONLY-NEXT: lbu s0, 124(a0) ; RV64V-ONLY-NEXT: lbu a0, 144(a0) -; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 +; RV64V-ONLY-NEXT: li a1, 255 ; RV64V-ONLY-NEXT: vmv.v.x v9, t1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t5 @@ -1890,6 +1892,9 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, t2 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64V-ONLY-NEXT: vmv.s.x v0, a1 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64V-ONLY-NEXT: .cfi_restore s0 @@ -2011,15 +2016,13 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV64ZVE32-NEXT: lbu t2, 154(a0) ; RV64ZVE32-NEXT: lbu t3, 161(a0) ; RV64ZVE32-NEXT: lbu t4, 163(a0) -; RV64ZVE32-NEXT: li t5, 255 -; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32-NEXT: vmv.s.x v0, t5 ; RV64ZVE32-NEXT: lbu t5, 93(a0) ; RV64ZVE32-NEXT: lbu t6, 105(a0) ; RV64ZVE32-NEXT: lbu s0, 124(a0) ; RV64ZVE32-NEXT: lbu a0, 144(a0) -; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 +; RV64ZVE32-NEXT: li a1, 255 ; RV64ZVE32-NEXT: vmv.v.x v9, t1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t5 @@ -2035,6 +2038,9 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a0 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, t0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, t2 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64ZVE32-NEXT: .cfi_restore s0 @@ -2503,15 +2509,13 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV32-ONLY-NEXT: lbu a3, 44(a0) ; RV32-ONLY-NEXT: lbu a4, 55(a0) ; RV32-ONLY-NEXT: lbu a5, 75(a0) -; RV32-ONLY-NEXT: li a6, 255 -; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-ONLY-NEXT: vmv.s.x v0, a6 ; RV32-ONLY-NEXT: lbu a6, 82(a0) ; RV32-ONLY-NEXT: lbu a7, 93(a0) ; RV32-ONLY-NEXT: lbu t0, 105(a0) ; RV32-ONLY-NEXT: lbu a0, 161(a0) -; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a2 +; RV32-ONLY-NEXT: li a2, 255 ; RV32-ONLY-NEXT: vmv.v.x v9, a6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7 @@ -2521,6 +2525,9 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 4 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a2 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: ret ; @@ -2590,15 +2597,13 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV64V-ONLY-NEXT: lbu a3, 44(a0) ; RV64V-ONLY-NEXT: lbu a4, 55(a0) ; RV64V-ONLY-NEXT: lbu a5, 75(a0) -; RV64V-ONLY-NEXT: li a6, 255 -; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.s.x v0, a6 ; RV64V-ONLY-NEXT: lbu a6, 82(a0) ; RV64V-ONLY-NEXT: lbu a7, 93(a0) ; RV64V-ONLY-NEXT: lbu t0, 105(a0) ; RV64V-ONLY-NEXT: lbu a0, 161(a0) -; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a2 +; RV64V-ONLY-NEXT: li a2, 255 ; RV64V-ONLY-NEXT: vmv.v.x v9, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7 @@ -2608,6 +2613,9 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 4 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64V-ONLY-NEXT: vmv.s.x v0, a2 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ret ; @@ -2677,15 +2685,13 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV64ZVE32-NEXT: lbu a3, 44(a0) ; RV64ZVE32-NEXT: lbu a4, 55(a0) ; RV64ZVE32-NEXT: lbu a5, 75(a0) -; RV64ZVE32-NEXT: li a6, 255 -; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32-NEXT: vmv.s.x v0, a6 ; RV64ZVE32-NEXT: lbu a6, 82(a0) ; RV64ZVE32-NEXT: lbu a7, 93(a0) ; RV64ZVE32-NEXT: lbu t0, 105(a0) ; RV64ZVE32-NEXT: lbu a0, 161(a0) -; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a2 +; RV64ZVE32-NEXT: li a2, 255 ; RV64ZVE32-NEXT: vmv.v.x v9, a6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7 @@ -2695,6 +2701,9 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a0 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a5 ; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 4 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a2 +; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ret %p4 = getelementptr i8, ptr %p, i32 31 @@ -2740,13 +2749,11 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV32-ONLY-NEXT: lbu a6, 82(a0) ; RV32-ONLY-NEXT: lbu a7, 93(a0) ; RV32-ONLY-NEXT: lbu t0, 124(a0) -; RV32-ONLY-NEXT: li t1, 255 -; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-ONLY-NEXT: vmv.s.x v0, t1 ; RV32-ONLY-NEXT: lbu t1, 144(a0) ; RV32-ONLY-NEXT: lbu a0, 154(a0) -; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 +; RV32-ONLY-NEXT: li a1, 255 ; RV32-ONLY-NEXT: vmv.v.x v9, a6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7 @@ -2760,6 +2767,9 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t1 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a0 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a1 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: ret ; @@ -2834,13 +2844,11 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV64V-ONLY-NEXT: lbu a6, 82(a0) ; RV64V-ONLY-NEXT: lbu a7, 93(a0) ; RV64V-ONLY-NEXT: lbu t0, 124(a0) -; RV64V-ONLY-NEXT: li t1, 255 -; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.s.x v0, t1 ; RV64V-ONLY-NEXT: lbu t1, 144(a0) ; RV64V-ONLY-NEXT: lbu a0, 154(a0) -; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 +; RV64V-ONLY-NEXT: li a1, 255 ; RV64V-ONLY-NEXT: vmv.v.x v9, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7 @@ -2854,6 +2862,9 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t1 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a0 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64V-ONLY-NEXT: vmv.s.x v0, a1 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ret ; @@ -2930,13 +2941,11 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV64ZVE32-NEXT: lbu a6, 82(a0) ; RV64ZVE32-NEXT: lbu a7, 93(a0) ; RV64ZVE32-NEXT: lbu t0, 124(a0) -; RV64ZVE32-NEXT: li t1, 255 -; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32-NEXT: vmv.s.x v0, t1 ; RV64ZVE32-NEXT: lbu t1, 144(a0) ; RV64ZVE32-NEXT: lbu a0, 154(a0) -; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 +; RV64ZVE32-NEXT: li a1, 255 ; RV64ZVE32-NEXT: vmv.v.x v9, a6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7 @@ -2950,6 +2959,9 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t1 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 @@ -3010,13 +3022,13 @@ define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 % ; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-ONLY-NEXT: vmv.v.x v8, a0 ; RV32-ONLY-NEXT: vmv.v.x v9, a4 -; RV32-ONLY-NEXT: vmv.v.i v0, 15 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a6 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a7 +; RV32-ONLY-NEXT: vmv.v.i v0, 15 ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV32-ONLY-NEXT: ret ; @@ -3063,13 +3075,13 @@ define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 % ; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64V-ONLY-NEXT: vmv.v.x v8, a0 ; RV64V-ONLY-NEXT: vmv.v.x v9, a4 -; RV64V-ONLY-NEXT: vmv.v.i v0, 15 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a7 +; RV64V-ONLY-NEXT: vmv.v.i v0, 15 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64V-ONLY-NEXT: ret ; @@ -3118,13 +3130,13 @@ define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 % ; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64ZVE32-NEXT: vmv.v.x v8, a0 ; RV64ZVE32-NEXT: vmv.v.x v9, a4 -; RV64ZVE32-NEXT: vmv.v.i v0, 15 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a6 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a7 +; RV64ZVE32-NEXT: vmv.v.i v0, 15 ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i8> poison, i8 %e1, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 3bb5e179e0d06..766d1db79f3ee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1059,24 +1059,24 @@ define void @mulhu_v16i8(ptr %x) { ; CHECK-NEXT: lui a1, 1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vxm v11, v9, a2, v0 -; CHECK-NEXT: addi a2, a1, 32 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: li a2, 513 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, 4 -; CHECK-NEXT: addi a1, a1, 78 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a2 +; CHECK-NEXT: addi a2, a1, 32 +; CHECK-NEXT: addi a1, a1, 78 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vv v9, v8, v9 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a2 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v12, v12, 3, v0 +; CHECK-NEXT: vsrl.vv v9, v8, v9 ; CHECK-NEXT: vmulhu.vv v9, v9, v10 -; CHECK-NEXT: vmerge.vim v10, v12, 3, v0 ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addi a1, a1, 304 ; CHECK-NEXT: vsub.vv v8, v8, v9 @@ -1085,7 +1085,7 @@ define void @mulhu_v16i8(ptr %x) { ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 2, v0 +; CHECK-NEXT: vmerge.vim v9, v12, 2, v0 ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret @@ -1103,20 +1103,20 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 1 -; CHECK-NEXT: li a1, 33 -; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: lui a1, %hi(.LCPI66_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 3 -; CHECK-NEXT: vle16.v v12, (a1) -; CHECK-NEXT: vmerge.vim v11, v11, 2, v0 +; CHECK-NEXT: vle16.v v11, (a1) +; CHECK-NEXT: li a1, 33 +; CHECK-NEXT: vmv.v.i v12, 3 +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmerge.vim v12, v12, 2, v0 ; CHECK-NEXT: vmv1r.v v13, v9 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v9, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vv v9, v8, v9 -; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vmulhu.vv v9, v9, v11 ; CHECK-NEXT: lui a1, 1048568 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v13, a1 @@ -1125,9 +1125,9 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK-NEXT: vmulhu.vv v8, v8, v13 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v11, v10, 6 +; CHECK-NEXT: vslideup.vi v12, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vsrl.vv v8, v8, v11 +; CHECK-NEXT: vsrl.vv v8, v8, v12 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x @@ -1247,9 +1247,9 @@ define void @mulhs_v16i8(ptr %x) { ; CHECK-NEXT: li a1, 57 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0 -; CHECK-NEXT: vmv.v.i v10, 7 ; CHECK-NEXT: vmulhu.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret @@ -3157,14 +3157,14 @@ define void @mulhu_v32i8(ptr %x) { ; CHECK-NEXT: lui a2, %hi(.LCPI181_0) ; CHECK-NEXT: addi a2, a2, %lo(.LCPI181_0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a2) +; CHECK-NEXT: vle8.v v10, (a2) ; CHECK-NEXT: lui a1, 163907 ; CHECK-NEXT: addi a1, a1, -2044 ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: li a1, -128 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmerge.vxm v12, v10, a1, v0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmerge.vxm v8, v12, a1, v0 ; CHECK-NEXT: lui a1, 66049 ; CHECK-NEXT: addi a1, a1, 32 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma @@ -3173,29 +3173,29 @@ define void @mulhu_v32i8(ptr %x) { ; CHECK-NEXT: vle8.v v14, (a0) ; CHECK-NEXT: addi a1, a1, 513 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: lui a1, 66785 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v16, 4 +; CHECK-NEXT: vsrl.vv v12, v14, v12 +; CHECK-NEXT: vmulhu.vv v10, v12, v10 +; CHECK-NEXT: vmv.v.i v12, 4 ; CHECK-NEXT: addi a1, a1, 78 -; CHECK-NEXT: vsrl.vv v10, v14, v10 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmulhu.vv v8, v10, v8 -; CHECK-NEXT: vmerge.vim v10, v16, 3, v0 +; CHECK-NEXT: vmerge.vim v12, v12, 3, v0 ; CHECK-NEXT: lui a1, 529160 ; CHECK-NEXT: addi a1, a1, 304 -; CHECK-NEXT: vsub.vv v14, v14, v8 -; CHECK-NEXT: vmulhu.vv v12, v14, v12 +; CHECK-NEXT: vsub.vv v14, v14, v10 +; CHECK-NEXT: vmulhu.vv v8, v14, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vadd.vv v8, v12, v8 -; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vmerge.vim v10, v12, 2, v0 ; CHECK-NEXT: vsrl.vv v8, v8, v10 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret @@ -3209,40 +3209,36 @@ define void @mulhu_v16i16(ptr %x) { ; RV32-LABEL: mulhu_v16i16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v10, (a0) +; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: li a1, 257 -; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: lui a1, %hi(.LCPI182_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) +; RV32-NEXT: vle16.v v12, (a1) ; RV32-NEXT: lui a1, 1048568 -; RV32-NEXT: vmerge.vxm v12, v8, a1, v0 -; RV32-NEXT: lui a1, 4 -; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: addi a1, a1, 64 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vmv.s.x v8, a1 +; RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; RV32-NEXT: lui a1, 2 ; RV32-NEXT: addi a1, a1, 289 -; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: lui a1, 4 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v16, v9, 1, v0 +; RV32-NEXT: vmv.v.i v14, 0 +; RV32-NEXT: vmv.v.i v15, 3 +; RV32-NEXT: addi a1, a1, 64 +; RV32-NEXT: vmerge.vim v15, v15, 2, v0 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: lui a1, %hi(.LCPI182_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 3 -; RV32-NEXT: vmerge.vim v9, v9, 2, v0 -; RV32-NEXT: vle16.v v14, (a1) -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v17, v9, 1, v0 +; RV32-NEXT: vmerge.vim v16, v14, 1, v0 +; RV32-NEXT: vmerge.vim v17, v15, 1, v0 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vsext.vf2 v8, v16 -; RV32-NEXT: vsrl.vv v8, v10, v8 -; RV32-NEXT: vmulhu.vv v8, v8, v14 -; RV32-NEXT: vsub.vv v10, v10, v8 -; RV32-NEXT: vmulhu.vv v10, v10, v12 -; RV32-NEXT: vadd.vv v8, v10, v8 +; RV32-NEXT: vsext.vf2 v14, v16 +; RV32-NEXT: vsrl.vv v14, v8, v14 +; RV32-NEXT: vmulhu.vv v12, v14, v12 +; RV32-NEXT: vsub.vv v8, v8, v12 +; RV32-NEXT: vmulhu.vv v8, v8, v10 +; RV32-NEXT: vadd.vv v8, v8, v12 ; RV32-NEXT: vsext.vf2 v10, v17 ; RV32-NEXT: vsrl.vv v8, v8, v10 ; RV32-NEXT: vse16.v v8, (a0) @@ -3383,20 +3379,21 @@ define void @mulhs_v32i8(ptr %x) { ; CHECK-LABEL: mulhs_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: lui a2, 304453 -; CHECK-NEXT: addi a2, a2, -1452 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: li a2, -123 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v10, 7 -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 -; CHECK-NEXT: vmv.v.x v12, a2 +; CHECK-NEXT: vmv.v.x v8, a2 +; CHECK-NEXT: lui a1, 304453 +; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vmv.v.i v12, 7 +; CHECK-NEXT: addi a1, a1, -1452 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: li a1, 57 -; CHECK-NEXT: vmerge.vxm v12, v12, a1, v0 -; CHECK-NEXT: vmulhu.vv v8, v8, v12 -; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0 +; CHECK-NEXT: vmulhu.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vv v8, v8, v12 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll index b65352aed2d52..cb77b1d4f4973 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll @@ -24,38 +24,35 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-NEXT: xor a0, a7, a0 ; ZVE32X-NEXT: snez a3, a3 ; ZVE32X-NEXT: snez a1, a1 +; ZVE32X-NEXT: snez a2, a2 +; ZVE32X-NEXT: snez a0, a0 ; ZVE32X-NEXT: vmv.s.x v10, a3 ; ZVE32X-NEXT: vmv.s.x v11, a1 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vand.vi v10, v10, 1 +; ZVE32X-NEXT: vand.vi v11, v11, 1 ; ZVE32X-NEXT: vmsne.vi v0, v10, 0 -; ZVE32X-NEXT: vand.vi v10, v11, 1 -; ZVE32X-NEXT: vmerge.vim v11, v8, 1, v0 -; ZVE32X-NEXT: vmsne.vi v0, v10, 0 -; ZVE32X-NEXT: snez a1, a2 -; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmerge.vim v10, v9, 1, v0 -; ZVE32X-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v10, v11, 1 -; ZVE32X-NEXT: vmv.s.x v11, a1 -; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v10, 0 -; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vand.vi v10, v11, 1 +; ZVE32X-NEXT: vmerge.vim v10, v8, 1, v0 +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32X-NEXT: vmerge.vim v11, v9, 1, v0 +; ZVE32X-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; ZVE32X-NEXT: vslideup.vi v11, v10, 1 +; ZVE32X-NEXT: vmv.s.x v10, a2 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; ZVE32X-NEXT: vand.vi v10, v10, 1 ; ZVE32X-NEXT: vmsne.vi v0, v10, 0 -; ZVE32X-NEXT: snez a0, a0 ; ZVE32X-NEXT: vmerge.vim v10, v8, 1, v0 +; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 +; ZVE32X-NEXT: vmerge.vim v11, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 3, e8, mf4, tu, ma ; ZVE32X-NEXT: vslideup.vi v11, v10, 2 ; ZVE32X-NEXT: vmv.s.x v10, a0 -; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v11, 0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vand.vi v10, v10, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 ; ZVE32X-NEXT: vmerge.vim v9, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vmsne.vi v0, v10, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 61bafd3cfeeef..f5e289bb9ce97 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -183,373 +183,298 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 96 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 6 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xe0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 96 * vlenb -; RV32-NEXT: addi a4, a1, 128 -; RV32-NEXT: addi a5, a1, 256 +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb +; RV32-NEXT: addi a3, a1, 128 ; RV32-NEXT: li a2, 32 -; RV32-NEXT: lui a3, 12 +; RV32-NEXT: lui a4, 12291 +; RV32-NEXT: lui a5, %hi(.LCPI8_0) +; RV32-NEXT: addi a5, a5, %lo(.LCPI8_0) +; RV32-NEXT: lui a6, 49164 +; RV32-NEXT: lui a7, %hi(.LCPI8_1) +; RV32-NEXT: addi a7, a7, %lo(.LCPI8_1) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vle32.v v16, (a5) -; RV32-NEXT: lui a5, 12291 -; RV32-NEXT: vmv.s.x v3, a3 -; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a6, 80 -; RV32-NEXT: mul a1, a1, a6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vslideup.vi v8, v16, 4 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a6, 56 -; RV32-NEXT: mul a1, a1, a6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v16, v16, 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a6, 88 -; RV32-NEXT: mul a1, a1, a6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a6, 88 -; RV32-NEXT: mul a1, a1, a6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a6, 68 -; RV32-NEXT: mul a1, a1, a6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vle32.v v8, (a4) -; RV32-NEXT: addi a5, a5, 3 -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 56 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: li t1, 56 +; RV32-NEXT: mul t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 16 +; RV32-NEXT: vs8r.v v16, (t0) # Unknown-size Folded Spill +; RV32-NEXT: vle32.v v8, (a3) +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, a4, 3 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vslideup.vi v4, v16, 2 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v24, v8, v24, v0 -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 88 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vslideup.vi v4, v16, 8, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, %hi(.LCPI8_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0) -; RV32-NEXT: lui a4, 49164 -; RV32-NEXT: lui a5, %hi(.LCPI8_1) -; RV32-NEXT: addi a5, a5, %lo(.LCPI8_1) -; RV32-NEXT: vle16.v v6, (a1) -; RV32-NEXT: addi a4, a4, 12 -; RV32-NEXT: vle16.v v4, (a5) +; RV32-NEXT: vle16.v v24, (a5) +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs2r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a6, a6, 12 +; RV32-NEXT: vle16.v v24, (a7) +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs2r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v24, v6 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv8r.v v16, v8 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 72 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 80 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.s.x v24, a6 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v8, v4 -; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: vmerge.vvm v24, v8, v16, v0 +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a5, 196656 -; RV32-NEXT: lui a1, %hi(.LCPI8_2) -; RV32-NEXT: addi a1, a1, %lo(.LCPI8_2) -; RV32-NEXT: lui a6, 3 -; RV32-NEXT: lui a7, 786624 -; RV32-NEXT: lui t0, 768 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl2r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vrgatherei16.vv v0, v24, v8 +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 -; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: addi a6, a6, 3 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li t1, 80 -; RV32-NEXT: mul a5, a5, t1 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li t1, 72 -; RV32-NEXT: mul a5, a5, t1 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vmv.s.x v0, a6 -; RV32-NEXT: addi a5, a7, 192 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: li a7, 88 -; RV32-NEXT: mul a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: li a7, 56 -; RV32-NEXT: mul a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v4, v8, v24, v0 -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: addi a3, a3, 12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 80 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 72 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vmv.s.x v0, a3 -; RV32-NEXT: addi a3, t0, 768 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 88 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 28 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v8, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vvm v24, v8, v16, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl2r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vrgatherei16.vv v0, v24, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 196656 +; RV32-NEXT: lui a4, %hi(.LCPI8_2) +; RV32-NEXT: addi a4, a4, %lo(.LCPI8_2) +; RV32-NEXT: vle16.v v16, (a4) +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs2r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vmv.s.x v0, a3 -; RV32-NEXT: lui a3, 3073 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 80 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 72 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v8, v16, v0 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vle16.v v28, (a1) -; RV32-NEXT: addi a1, a3, -1024 -; RV32-NEXT: vmv4r.v v8, v24 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 88 +; RV32-NEXT: li a4, 56 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmerge.vvm v24, v8, v16, v0 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs4r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vl2r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vrgatherei16.vv v0, v24, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 786624 +; RV32-NEXT: lui a4, 768 +; RV32-NEXT: lui a5, 3073 +; RV32-NEXT: addi a3, a3, 192 +; RV32-NEXT: addi a4, a4, 768 +; RV32-NEXT: addi a5, a5, -1024 +; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: vmv.s.x v6, a4 +; RV32-NEXT: vmv.s.x v7, a5 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 56 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmerge.vvm v24, v8, v16, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vmerge.vvm v24, v8, v16, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, a1, 256 +; RV32-NEXT: lui a2, 12 +; RV32-NEXT: lui a3, 3 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: li a5, 192 +; RV32-NEXT: addi a3, a3, 3 +; RV32-NEXT: addi a6, a2, 12 +; RV32-NEXT: vmv.s.x v7, a4 +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vmv.s.x v6, a5 +; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: vmv.s.x v5, a6 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 16 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vmerge.vvm v20, v16, v8, v0 +; RV32-NEXT: vmv1r.v v0, v5 +; RV32-NEXT: vmerge.vvm v28, v16, v8, v0 +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmerge.vvm v12, v16, v8, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v16, v28 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 72 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v8, v16, v0 +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vmerge.vvm v12, v16, v8, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 80 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vslideup.vi v24, v8, 4 +; RV32-NEXT: vslideup.vi v4, v8, 2 ; RV32-NEXT: lui a1, %hi(.LCPI8_3) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_3) -; RV32-NEXT: li a2, 192 ; RV32-NEXT: vmv.s.x v0, a2 +; RV32-NEXT: vslideup.vi v24, v16, 10, v0.t ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v12, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 88 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 88 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 68 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vslideup.vi v4, v16, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v16, v24 +; RV32-NEXT: vmv.v.v v24, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 68 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv.v.v v16, v24 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv.v.v v4, v8 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vrgatherei16.vv v16, v20, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v20, v4, v12 -; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vmv.v.v v16, v8 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI8_4) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_4) ; RV32-NEXT: lui a2, %hi(.LCPI8_5) ; RV32-NEXT: addi a2, a2, %lo(.LCPI8_5) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v16, (a1) +; RV32-NEXT: vle16.v v24, (a1) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v24, (a2) +; RV32-NEXT: vle16.v v8, (a2) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI8_6) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_6) ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle16.v v18, (a1) +; RV32-NEXT: vle16.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v16 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 28 +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl1r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v0, v12, v24 +; RV32-NEXT: vrgatherei16.vv v24, v28, v12 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v0, v8 +; RV32-NEXT: vmv.v.v v24, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl2r.v v28, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v24, v18 +; RV32-NEXT: vrgatherei16.vv v8, v16, v28 ; RV32-NEXT: lui a1, %hi(.LCPI8_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_7) ; RV32-NEXT: lui a2, %hi(.LCPI8_8) @@ -561,84 +486,66 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v14, (a2) ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 72 +; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vs2r.v v14, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vle16.v v13, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v13, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle16.v v1, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v16, v4, v12 +; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v28, v16, v12 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v16, v8 +; RV32-NEXT: vmv.v.v v28, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 80 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 72 +; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl2r.v v6, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v24, v6 +; RV32-NEXT: vrgatherei16.vv v8, v16, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 88 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v28, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v12, v24, v28 +; RV32-NEXT: vrgatherei16.vv v12, v16, v1 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v12, v8 ; RV32-NEXT: addi a1, a0, 320 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vse32.v v12, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vse32.v v16, (a1) +; RV32-NEXT: vse32.v v28, (a1) ; RV32-NEXT: addi a1, a0, 192 -; RV32-NEXT: vse32.v v0, (a1) +; RV32-NEXT: vse32.v v24, (a1) ; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: vse32.v v20, (a1) -; RV32-NEXT: addi a1, a0, 64 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 6 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: addi a1, a0, 64 +; RV32-NEXT: vse32.v v4, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 68 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 96 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 6 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -650,467 +557,378 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 93 +; RV64-NEXT: li a3, 66 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdd, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 93 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 66 * vlenb ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: vle64.v v24, (a1) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 +; RV64-NEXT: li a3, 22 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: addi a2, a1, 128 -; RV64-NEXT: addi a3, a1, 256 -; RV64-NEXT: li a4, 128 -; RV64-NEXT: lui a1, 1 -; RV64-NEXT: vle64.v v8, (a3) -; RV64-NEXT: lui a3, %hi(.LCPI8_0) -; RV64-NEXT: addi a3, a3, %lo(.LCPI8_0) -; RV64-NEXT: vmv.s.x v0, a4 +; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a3, a1, 128 +; RV64-NEXT: lui a2, 1 +; RV64-NEXT: lui a4, %hi(.LCPI8_0) +; RV64-NEXT: addi a4, a4, %lo(.LCPI8_0) +; RV64-NEXT: vle64.v v16, (a3) +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a5, 58 +; RV64-NEXT: mul a3, a3, a5 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV64-NEXT: lui a3, 2 +; RV64-NEXT: vle16.v v8, (a4) ; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: li a5, 61 +; RV64-NEXT: li a5, 54 ; RV64-NEXT: mul a4, a4, a5 ; RV64-NEXT: add a4, sp, a4 ; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill -; RV64-NEXT: addi a4, a1, 65 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vslideup.vi v24, v8, 2 -; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 8 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 77 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 16 -; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 77 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 16 -; RV64-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v24, v16, 5, v0.t -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 73 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 16 -; RV64-NEXT: vs4r.v v24, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v24, (a2) -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a5, 85 -; RV64-NEXT: mul a2, a2, a5 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle16.v v12, (a3) +; RV64-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill +; RV64-NEXT: lui a4, %hi(.LCPI8_1) +; RV64-NEXT: addi a4, a4, %lo(.LCPI8_1) +; RV64-NEXT: vle16.v v8, (a4) +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: li a5, 30 +; RV64-NEXT: mul a4, a4, a5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 16 +; RV64-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a4, a2, 65 +; RV64-NEXT: addi a3, a3, 130 ; RV64-NEXT: vmv.s.x v0, a4 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 85 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v24, v24, v16, v0 -; RV64-NEXT: vrgatherei16.vv v0, v24, v12 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 37 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v12, v8, 1 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 61 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl1r.v v7, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v12, v24, 4, v0.t -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 69 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill -; RV64-NEXT: lui a2, 2 -; RV64-NEXT: lui a3, 4 -; RV64-NEXT: li a4, 32 -; RV64-NEXT: addi a2, a2, 130 -; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: addi a2, a3, 260 -; RV64-NEXT: vmv8r.v v24, v16 +; RV64-NEXT: vmv.s.x v8, a3 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a5, 85 -; RV64-NEXT: mul a3, a3, a5 +; RV64-NEXT: li a4, 38 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 -; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV64-NEXT: vs1r.v v8, (a3) # Unknown-size Folded Spill +; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: li a4, 54 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 -; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vmv.s.x v2, a4 -; RV64-NEXT: vmv4r.v v12, v8 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 85 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 29 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v2 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 45 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v12, v8, 5, v0.t -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v12, v24, 4, v0.t -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 6 -; RV64-NEXT: add a2, a3, a2 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vslidedown.vi v12, v8, 1 -; RV64-NEXT: vmv1r.v v0, v2 -; RV64-NEXT: vslideup.vi v12, v8, 4, v0.t -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: vrgather.vi v12, v24, 5, v0.t -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 25 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill -; RV64-NEXT: lui a2, 8 -; RV64-NEXT: addi a2, a2, 520 -; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vslideup.vi v12, v24, 6 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 85 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 4 -; RV64-NEXT: add a2, a3, a2 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t -; RV64-NEXT: lui a2, %hi(.LCPI8_1) -; RV64-NEXT: addi a2, a2, %lo(.LCPI8_1) -; RV64-NEXT: li a3, 192 -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vle16.v v6, (a2) -; RV64-NEXT: vmv.s.x v0, a3 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 45 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgather.vi v28, v16, 2 -; RV64-NEXT: vmerge.vvm v16, v28, v12, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 61 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v16, v6 -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV64-NEXT: lui a2, %hi(.LCPI8_2) -; RV64-NEXT: addi a2, a2, %lo(.LCPI8_2) -; RV64-NEXT: li a3, 1040 -; RV64-NEXT: vmv.s.x v0, a3 -; RV64-NEXT: addi a1, a1, -2016 +; RV64-NEXT: vl2r.v v24, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v0, v8, v24 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 46 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 85 +; RV64-NEXT: li a4, 38 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 22 ; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 30 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vl2r.v v16, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v0, v8, v16 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 38 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV64-NEXT: lui a3, 4 +; RV64-NEXT: lui a4, %hi(.LCPI8_2) +; RV64-NEXT: addi a4, a4, %lo(.LCPI8_2) +; RV64-NEXT: vle16.v v8, (a4) +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: li a5, 54 +; RV64-NEXT: mul a4, a4, a5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 16 +; RV64-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a3, a3, 260 +; RV64-NEXT: vmv.s.x v0, a3 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 53 +; RV64-NEXT: li a4, 58 ; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v8, v24, v16, v0 +; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: li a4, 54 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 16 -; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vle16.v v6, (a2) -; RV64-NEXT: li a1, 64 -; RV64-NEXT: vmerge.vvm v8, v24, v16, v0 +; RV64-NEXT: vl2r.v v16, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v0, v8, v16 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 30 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, a1, 256 +; RV64-NEXT: lui a3, 8 +; RV64-NEXT: li a4, 1040 +; RV64-NEXT: addi a3, a3, 520 +; RV64-NEXT: addi a2, a2, -2016 +; RV64-NEXT: vmv.s.x v7, a4 +; RV64-NEXT: vmv.s.x v0, a3 +; RV64-NEXT: vmv.s.x v6, a2 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 58 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 85 +; RV64-NEXT: li a3, 6 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv8r.v v16, v24 +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 +; RV64-NEXT: li a2, 58 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v24, v16, v6 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v24, v24, v16, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 14 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv4r.v v28, v8 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v28, v8, 5, v0.t +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 73 +; RV64-NEXT: li a2, 58 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v16, v24, v16, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 +; RV64-NEXT: li a2, 22 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v0 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv4r.v v16, v8 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vslideup.vi v16, v8, 5, v0.t +; RV64-NEXT: vmv.v.v v4, v16 +; RV64-NEXT: vslidedown.vi v16, v8, 1 +; RV64-NEXT: vslideup.vi v16, v8, 4, v0.t +; RV64-NEXT: vmv.v.v v20, v16 +; RV64-NEXT: li a1, 128 +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vslideup.vi v16, v8, 2 +; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma +; RV64-NEXT: vslidedown.vi v24, v8, 8 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vslideup.vi v16, v24, 5, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 73 +; RV64-NEXT: li a2, 58 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vslideup.vi v12, v8, 1 +; RV64-NEXT: vslideup.vi v12, v24, 4, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 54 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vrgather.vi v4, v24, 4, v0.t +; RV64-NEXT: vrgather.vi v20, v24, 5, v0.t +; RV64-NEXT: vslideup.vi v12, v24, 6 +; RV64-NEXT: vslideup.vi v12, v24, 1, v0.t ; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: li a1, 64 +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv4r.v v12, v24 +; RV64-NEXT: vslideup.vi v12, v24, 5, v0.t ; RV64-NEXT: lui a1, %hi(.LCPI8_3) ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_3) ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vle16.v v20, (a1) -; RV64-NEXT: lui a1, %hi(.LCPI8_4) -; RV64-NEXT: addi a1, a1, %lo(.LCPI8_4) -; RV64-NEXT: vle16.v v8, (a1) +; RV64-NEXT: vle16.v v24, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs2r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 6 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 46 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 6 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 58 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma +; RV64-NEXT: vmv.v.v v28, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 58 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v0, v8, v20 +; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 25 +; RV64-NEXT: li a2, 38 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v12, v0 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: li a2, 54 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v28, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 54 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v0, v16, v8 -; RV64-NEXT: lui a1, %hi(.LCPI8_5) -; RV64-NEXT: addi a1, a1, %lo(.LCPI8_5) -; RV64-NEXT: vle16.v v20, (a1) +; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 30 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v0 +; RV64-NEXT: vmv.v.v v4, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 46 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 6 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v24, v0, v16 +; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma +; RV64-NEXT: vmv.v.v v20, v24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 38 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: li a1, 192 +; RV64-NEXT: lui a2, %hi(.LCPI8_4) +; RV64-NEXT: addi a2, a2, %lo(.LCPI8_4) +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vle16.v v2, (a2) +; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgather.vi v8, v0, 3 +; RV64-NEXT: vrgather.vi v24, v8, 2 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v4, v24, v16, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v8, v8, v28, v0 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v24, v16, v2 +; RV64-NEXT: lui a1, %hi(.LCPI8_5) +; RV64-NEXT: addi a1, a1, %lo(.LCPI8_5) +; RV64-NEXT: vle16.v v28, (a1) +; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma +; RV64-NEXT: vmv.v.v v4, v24 +; RV64-NEXT: vmv4r.v v16, v4 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vrgather.vi v24, v8, 3 +; RV64-NEXT: vmerge.vvm v24, v24, v12, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 85 +; RV64-NEXT: li a2, 22 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v0, v20 +; RV64-NEXT: vrgatherei16.vv v8, v0, v28 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: vmv.v.v v24, v8 ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vse64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 320 +; RV64-NEXT: vse64.v v24, (a1) +; RV64-NEXT: addi a1, a0, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 61 +; RV64-NEXT: li a3, 38 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl4r.v v20, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vse64.v v20, (a1) -; RV64-NEXT: addi a1, a0, 320 +; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: addi a1, a0, 192 -; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 6 -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: li a3, 46 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 69 +; RV64-NEXT: li a3, 54 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 73 +; RV64-NEXT: li a2, 58 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 93 +; RV64-NEXT: li a1, 66 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index c29ccd45528b8..b11df313bc2d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -141,8 +141,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -151,8 +151,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1: ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.v.i v0, 3 ; ZVE32F-NEXT: vmv.v.x v8, a1 +; ZVE32F-NEXT: vmv.v.i v0, 3 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -245,10 +245,10 @@ define <8 x i1> @buildvec_mask_v8i1() { define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 19 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -256,10 +256,10 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: li a2, 19 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a2 ; ZVE32F-NEXT: vmv.v.x v8, a1 +; ZVE32F-NEXT: li a1, 19 +; ZVE32F-NEXT: vmv.s.x v0, a1 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -282,12 +282,12 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vslide1down.vx v9, v8, a0 ; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v8, v8, zero ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -299,12 +299,12 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; ZVE32F-NEXT: vmv.v.x v8, a0 ; ZVE32F-NEXT: vslide1down.vx v9, v8, a0 ; ZVE32F-NEXT: li a0, 1 -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v9, v9, a0 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -327,12 +327,12 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vslide1down.vx v9, v8, a0 ; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v8, v8, zero ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -344,12 +344,12 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; ZVE32F-NEXT: vmv.v.x v8, a0 ; ZVE32F-NEXT: vslide1down.vx v9, v8, a0 ; ZVE32F-NEXT: li a0, 1 -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v9, v9, a0 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -370,13 +370,13 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v9, v8, a0 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -386,13 +386,13 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; ZVE32F-NEXT: vmv.v.x v8, a0 -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v9, v8, a0 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll index 979785dd2c024..84486a96873d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -24,11 +24,11 @@ define void @splat_zeros_v2i1(ptr %x) { define void @splat_v1i1(ptr %x, i1 %y) { ; CHECK-LABEL: splat_v1i1: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 @@ -48,11 +48,11 @@ define void @splat_v1i1_icmp(ptr %x, i32 signext %y, i32 signext %z) { ; CHECK-LABEL: splat_v1i1_icmp: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a1, a1, a2 -; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 @@ -84,9 +84,9 @@ define void @splat_v4i1(ptr %x, i1 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 76590d47a3230..be2bd61baf0e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -13744,11 +13744,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64V-NEXT: vslidedown.vi v8, v8, 16 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v8 ; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64V-NEXT: vslidedown.vi v0, v0, 2 -; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v8 -; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: li a0, 32 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma @@ -14278,8 +14278,8 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) { ; RV32: # %bb.0: ; RV32-NEXT: li a1, -512 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: vluxei32.v v8, (a0), v8 ; RV32-NEXT: ret @@ -14287,10 +14287,11 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) { ; RV64V-LABEL: mgather_narrow_edge_case: ; RV64V: # %bb.0: ; RV64V-NEXT: li a1, -512 +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64V-NEXT: vmv.v.i v0, 5 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vmerge.vim v10, v8, 0, v0 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64V-NEXT: vluxei64.v v8, (a0), v10 @@ -14301,8 +14302,8 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) { ; RV64ZVE32F-NEXT: lw a1, -512(a0) ; RV64ZVE32F-NEXT: lw a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.v.i v0, 5 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vmv.v.i v0, 5 ; RV64ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> @@ -14374,8 +14375,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV32-NEXT: vslide1down.vx v9, v9, a5 ; RV32-NEXT: vslide1down.vx v10, v8, a3 ; RV32-NEXT: vslide1down.vx v8, v9, a6 -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v8, v8, a7 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV32-NEXT: ret ; @@ -14449,8 +14450,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64V-NEXT: vmv.v.x v8, a3 ; RV64V-NEXT: vslide1down.vx v8, v8, a0 ; RV64V-NEXT: vslide1down.vx v8, v8, a1 -; RV64V-NEXT: vmv.v.i v0, 15 ; RV64V-NEXT: vslide1down.vx v8, v8, a2 +; RV64V-NEXT: vmv.v.i v0, 15 ; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64V-NEXT: addi sp, s0, -128 ; RV64V-NEXT: .cfi_def_cfa sp, 128 @@ -14481,31 +14482,31 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: lbu t1, 20(a0) ; RV64ZVE32F-NEXT: lbu t2, 21(a0) ; RV64ZVE32F-NEXT: slli a6, a6, 8 -; RV64ZVE32F-NEXT: or a5, a6, a5 ; RV64ZVE32F-NEXT: slli t0, t0, 8 ; RV64ZVE32F-NEXT: slli a4, a4, 8 -; RV64ZVE32F-NEXT: slli t2, t2, 8 +; RV64ZVE32F-NEXT: or a5, a6, a5 ; RV64ZVE32F-NEXT: or a6, t0, a7 ; RV64ZVE32F-NEXT: or a2, a4, a2 ; RV64ZVE32F-NEXT: lbu a4, 24(a0) ; RV64ZVE32F-NEXT: lbu a7, 25(a0) -; RV64ZVE32F-NEXT: or t0, t2, t1 -; RV64ZVE32F-NEXT: lbu t1, 28(a0) +; RV64ZVE32F-NEXT: lbu t0, 28(a0) ; RV64ZVE32F-NEXT: lbu a0, 29(a0) +; RV64ZVE32F-NEXT: slli t2, t2, 8 +; RV64ZVE32F-NEXT: or t1, t2, t1 ; RV64ZVE32F-NEXT: slli a7, a7, 8 ; RV64ZVE32F-NEXT: or a4, a7, a4 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: slli a0, a0, 8 -; RV64ZVE32F-NEXT: or a0, a0, t1 +; RV64ZVE32F-NEXT: or a0, a0, t0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, t0 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, t1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14540,7 +14541,6 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 24(a0) ; RV64ZVE32F-NEXT: lh a0, 26(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14549,6 +14549,7 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14585,7 +14586,6 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 28(a0) ; RV64ZVE32F-NEXT: lh a0, 30(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14594,6 +14594,7 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -14624,21 +14625,21 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: lh a1, 24(a0) ; RV64ZVE32F-NEXT: lh a2, 26(a0) ; RV64ZVE32F-NEXT: lh a3, 28(a0) -; RV64ZVE32F-NEXT: lh a4, 30(a0) -; RV64ZVE32F-NEXT: lh a5, 16(a0) -; RV64ZVE32F-NEXT: lh a6, 18(a0) -; RV64ZVE32F-NEXT: lh a7, 20(a0) +; RV64ZVE32F-NEXT: lh a4, 16(a0) +; RV64ZVE32F-NEXT: lh a5, 18(a0) +; RV64ZVE32F-NEXT: lh a6, 20(a0) +; RV64ZVE32F-NEXT: lh a7, 30(a0) ; RV64ZVE32F-NEXT: lh a0, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a3 -; RV64ZVE32F-NEXT: vmv.v.x v9, a7 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vmv.v.x v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -14669,21 +14670,21 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: lh a1, 20(a0) ; RV64ZVE32F-NEXT: lh a2, 22(a0) ; RV64ZVE32F-NEXT: lh a3, 28(a0) -; RV64ZVE32F-NEXT: lh a4, 30(a0) -; RV64ZVE32F-NEXT: lh a5, 4(a0) -; RV64ZVE32F-NEXT: lh a6, 6(a0) -; RV64ZVE32F-NEXT: lh a7, 12(a0) +; RV64ZVE32F-NEXT: lh a4, 4(a0) +; RV64ZVE32F-NEXT: lh a5, 6(a0) +; RV64ZVE32F-NEXT: lh a6, 12(a0) +; RV64ZVE32F-NEXT: lh a7, 30(a0) ; RV64ZVE32F-NEXT: lh a0, 14(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a3 -; RV64ZVE32F-NEXT: vmv.v.x v9, a7 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vmv.v.x v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -14719,7 +14720,6 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 4(a0) ; RV64ZVE32F-NEXT: lh a0, 6(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vmv.v.x v9, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 @@ -14728,6 +14728,7 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14766,7 +14767,6 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 4(a0) ; RV64ZVE32F-NEXT: lh a0, 6(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vmv.v.x v9, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 @@ -14775,6 +14775,7 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14813,7 +14814,6 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; RV64ZVE32F-NEXT: lh a6, 18(a0) ; RV64ZVE32F-NEXT: lh a0, 20(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14822,6 +14822,7 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a0 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a3 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14864,7 +14865,6 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 20(a0) ; RV64ZVE32F-NEXT: lh a0, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14873,6 +14873,7 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14912,7 +14913,6 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 20(a0) ; RV64ZVE32F-NEXT: lh a0, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14921,6 +14921,7 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14969,7 +14970,6 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 4(a0) ; RV64ZVE32F-NEXT: lh a0, 6(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14978,6 +14978,7 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -15017,7 +15018,6 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 12(a0) ; RV64ZVE32F-NEXT: lh a0, 14(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 @@ -15026,6 +15026,7 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll index dbbec96445e3e..f27c8e5d664e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll @@ -242,9 +242,9 @@ define <32 x double> @masked_load_v32f64(ptr %a, <32 x i1> %mask) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -280,9 +280,9 @@ define <64 x float> @masked_load_v64f32(ptr %a, <64 x i1> %mask) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -296,9 +296,9 @@ define <128 x bfloat> @masked_load_v128bf16(ptr %a, <128 x i1> %mask) { ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -312,9 +312,9 @@ define <128 x half> @masked_load_v128f16(ptr %a, <128 x i1> %mask) { ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index bca3544d8f032..6e613917f8cd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -240,9 +240,9 @@ define <32 x i64> @masked_load_v32i64(ptr %a, <32 x i1> %mask) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -278,9 +278,9 @@ define <64 x i32> @masked_load_v64i32(ptr %a, <64 x i1> %mask) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -305,9 +305,9 @@ define <128 x i16> @masked_load_v128i16(ptr %a, <128 x i1> %mask) { ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 8f2672e8f40c1..954a06e265659 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -12443,11 +12443,11 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64V-NEXT: vslidedown.vi v16, v8, 16 ; RV64V-NEXT: vslidedown.vi v18, v10, 16 -; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64V-NEXT: vslidedown.vi v0, v0, 2 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64V-NEXT: vsext.vf8 v8, v18 -; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64V-NEXT: vslidedown.vi v0, v0, 2 +; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-NEXT: vsoxei64.v v16, (a0), v8, v0.t ; RV64V-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll index f7e311d06c03a..6421d7c8022f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll @@ -242,9 +242,9 @@ define void @masked_store_v32f64(<32 x double> %val, ptr %a, <32 x i1> %mask) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -280,9 +280,9 @@ define void @masked_store_v64f32(<64 x float> %val, ptr %a, <64 x i1> %mask) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -296,9 +296,9 @@ define void @masked_store_v128bf16(<128 x bfloat> %val, ptr %a, <128 x i1> %mask ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -312,9 +312,9 @@ define void @masked_store_v128f16(<128 x half> %val, ptr %a, <128 x i1> %mask) { ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v16, (a0), v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll index 6914a86726af4..a6b29d675ce57 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -240,9 +240,9 @@ define void @masked_store_v32i64(<32 x i64> %val, ptr %a, <32 x i1> %mask) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -278,9 +278,9 @@ define void @masked_store_v64i32(<64 x i32> %val, ptr %a, <64 x i1> %mask) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -305,9 +305,9 @@ define void @masked_store_v128i16(<128 x i16> %val, ptr %a, <128 x i1> %mask) { ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -321,10 +321,10 @@ define void @masked_store_v256i8(<256 x i8> %val, ptr %a, <256 x i1> %mask) { ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) -; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vse8.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vse8.v v16, (a1), v0.t +; CHECK-NEXT: vse8.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v256i8.p0(<256 x i8> %val, ptr %a, i32 8, <256 x i1> %mask) ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index b6c441290ee45..cbaf62823f612 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -135,11 +135,11 @@ declare <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI6_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -264,9 +264,9 @@ define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: vp_nearbyint_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t @@ -308,9 +308,9 @@ define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-LABEL: vp_nearbyint_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t @@ -393,11 +393,11 @@ declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -437,11 +437,11 @@ declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -481,11 +481,11 @@ declare <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI22_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -525,11 +525,11 @@ declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI24_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -569,47 +569,47 @@ declare <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: lui a2, %hi(.LCPI26_0) +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: frflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vmv1r.v v5, v7 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a1 +; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: frflags a1 +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v @@ -626,31 +626,28 @@ define <32 x double> @vp_nearbyint_v32f64_unmasked(<32 x double> %va, i32 zeroex ; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: lui a1, %hi(.LCPI27_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: frflags a2 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: frflags a1 -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsflags a1 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: fsflags a1 +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 2cebf14ec5aa4..a7f38171e5522 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1928,14 +1928,13 @@ define float @vreduce_fminimum_v64f32(ptr %x) { ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB119_2 @@ -1983,57 +1982,54 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: addi a2, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v24, (a2) +; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v24, v16, v24 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v8, v8 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v16, v8, v16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v24, v8 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB121_2 @@ -2231,14 +2227,13 @@ define double @vreduce_fminimum_v32f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB131_2 @@ -2284,57 +2279,54 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v24, v16, v24 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v8, v8 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v16, v8, v16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v24, v8 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB133_2 @@ -2610,14 +2602,13 @@ define float @vreduce_fmaximum_v64f32(ptr %x) { ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB147_2 @@ -2665,57 +2656,54 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: addi a2, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v24, (a2) +; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v24, v16, v24 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v8, v8 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v16, v8, v16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v24, v8 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB149_2 @@ -2913,14 +2901,13 @@ define double @vreduce_fmaximum_v32f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB159_2 @@ -2966,57 +2953,54 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v24, v16, v24 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v8, v8 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v16, v8, v16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v24, v8 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB161_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll index 276f6b077931b..7d13f3ff32b4a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -231,19 +231,16 @@ declare i1 @llvm.vp.reduce.and.v256i1(i1, <256 x i1>, <256 x i1>, i32) define zeroext i1 @vpreduce_and_v256i1(i1 zeroext %s, <256 x i1> %v, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v256i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v11, v9 -; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB14_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB14_2: -; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmnot.m v9, v9 -; CHECK-NEXT: vcpop.m a2, v9, v0.t +; CHECK-NEXT: vmnot.m v11, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vcpop.m a2, v11, v0.t ; CHECK-NEXT: seqz a2, a2 ; CHECK-NEXT: and a0, a2, a0 ; CHECK-NEXT: addi a2, a1, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index 35cd789acfcc8..50df3dace4364 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -123,11 +123,11 @@ declare <16 x half> @llvm.vp.rint.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI6_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI6_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -240,10 +240,10 @@ define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-LABEL: vp_rint_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -280,10 +280,10 @@ define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-LABEL: vp_rint_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 @@ -357,11 +357,11 @@ declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -397,11 +397,11 @@ declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 @@ -437,11 +437,11 @@ declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI22_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -477,11 +477,11 @@ declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI24_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -517,39 +517,39 @@ declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: lui a2, %hi(.LCPI26_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: addi a2, a0, -16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vmv1r.v v5, v7 +; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -570,23 +570,20 @@ define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %ev ; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: lui a1, %hi(.LCPI27_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index d8ff7062f033e..f595fe8e23c59 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -17,9 +17,9 @@ define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -35,12 +35,12 @@ define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -63,8 +63,8 @@ define <2 x half> @vp_round_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -77,11 +77,11 @@ define <2 x half> @vp_round_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -103,9 +103,9 @@ define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -121,12 +121,12 @@ define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -149,8 +149,8 @@ define <4 x half> @vp_round_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,11 +163,11 @@ define <4 x half> @vp_round_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -189,9 +189,9 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -207,12 +207,12 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -235,8 +235,8 @@ define <8 x half> @vp_round_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,11 +249,11 @@ define <8 x half> @vp_round_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -271,14 +271,14 @@ declare <16 x half> @llvm.vp.round.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -295,12 +295,12 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -323,8 +323,8 @@ define <16 x half> @vp_round_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -337,11 +337,11 @@ define <16 x half> @vp_round_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,9 +363,9 @@ define <2 x float> @vp_round_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -384,8 +384,8 @@ define <2 x float> @vp_round_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -405,9 +405,9 @@ define <4 x float> @vp_round_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -426,8 +426,8 @@ define <4 x float> @vp_round_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -444,13 +444,13 @@ define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-LABEL: vp_round_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -470,8 +470,8 @@ define <8 x float> @vp_round_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -488,13 +488,13 @@ define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-LABEL: vp_round_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,8 +514,8 @@ define <16 x float> @vp_round_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -535,9 +535,9 @@ define <2 x double> @vp_round_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext % ; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -556,8 +556,8 @@ define <2 x double> @vp_round_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -573,14 +573,14 @@ declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -600,8 +600,8 @@ define <4 x double> @vp_round_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -617,14 +617,14 @@ declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -644,8 +644,8 @@ define <8 x double> @vp_round_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -661,14 +661,14 @@ declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI22_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -688,8 +688,8 @@ define <15 x double> @vp_round_v15f64_unmasked(<15 x double> %va, i32 zeroext %e ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -705,14 +705,14 @@ declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI24_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -732,8 +732,8 @@ define <16 x double> @vp_round_v16f64_unmasked(<16 x double> %va, i32 zeroext %e ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -749,44 +749,44 @@ declare <32 x double> @llvm.vp.round.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: lui a2, %hi(.LCPI26_0) +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: fsrmi a2, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vmv1r.v v5, v7 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -806,28 +806,25 @@ define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: lui a1, %hi(.LCPI27_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: fsrmi a2, 4 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index 2649f234375d2..9d9762c8fee25 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -17,9 +17,9 @@ define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext % ; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -35,12 +35,12 @@ define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -63,8 +63,8 @@ define <2 x half> @vp_roundeven_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -77,11 +77,11 @@ define <2 x half> @vp_roundeven_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -103,9 +103,9 @@ define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext % ; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -121,12 +121,12 @@ define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -149,8 +149,8 @@ define <4 x half> @vp_roundeven_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,11 +163,11 @@ define <4 x half> @vp_roundeven_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -189,9 +189,9 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -207,12 +207,12 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -235,8 +235,8 @@ define <8 x half> @vp_roundeven_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) ; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,11 +249,11 @@ define <8 x half> @vp_roundeven_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -271,14 +271,14 @@ declare <16 x half> @llvm.vp.roundeven.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -295,12 +295,12 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -323,8 +323,8 @@ define <16 x half> @vp_roundeven_v16f16_unmasked(<16 x half> %va, i32 zeroext %e ; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -337,11 +337,11 @@ define <16 x half> @vp_roundeven_v16f16_unmasked(<16 x half> %va, i32 zeroext %e ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,9 +363,9 @@ define <2 x float> @vp_roundeven_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -384,8 +384,8 @@ define <2 x float> @vp_roundeven_v2f32_unmasked(<2 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -405,9 +405,9 @@ define <4 x float> @vp_roundeven_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -426,8 +426,8 @@ define <4 x float> @vp_roundeven_v4f32_unmasked(<4 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -444,13 +444,13 @@ define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: vp_roundeven_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -470,8 +470,8 @@ define <8 x float> @vp_roundeven_v8f32_unmasked(<8 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -488,13 +488,13 @@ define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-LABEL: vp_roundeven_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,8 +514,8 @@ define <16 x float> @vp_roundeven_v16f32_unmasked(<16 x float> %va, i32 zeroext ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -535,9 +535,9 @@ define <2 x double> @vp_roundeven_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroe ; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -556,8 +556,8 @@ define <2 x double> @vp_roundeven_v2f64_unmasked(<2 x double> %va, i32 zeroext % ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -573,14 +573,14 @@ declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -600,8 +600,8 @@ define <4 x double> @vp_roundeven_v4f64_unmasked(<4 x double> %va, i32 zeroext % ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -617,14 +617,14 @@ declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -644,8 +644,8 @@ define <8 x double> @vp_roundeven_v8f64_unmasked(<8 x double> %va, i32 zeroext % ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -661,14 +661,14 @@ declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI22_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -688,8 +688,8 @@ define <15 x double> @vp_roundeven_v15f64_unmasked(<15 x double> %va, i32 zeroex ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -705,14 +705,14 @@ declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI24_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -732,8 +732,8 @@ define <16 x double> @vp_roundeven_v16f64_unmasked(<16 x double> %va, i32 zeroex ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -749,44 +749,44 @@ declare <32 x double> @llvm.vp.roundeven.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: lui a2, %hi(.LCPI26_0) +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: fsrmi a2, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vmv1r.v v5, v7 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -806,28 +806,25 @@ define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroex ; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: lui a1, %hi(.LCPI27_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index 50e65b62e7848..cc9fb45aedb6c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -17,9 +17,9 @@ define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext ; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -35,12 +35,12 @@ define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -63,8 +63,8 @@ define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %ev ; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -77,11 +77,11 @@ define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -103,9 +103,9 @@ define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext ; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -121,12 +121,12 @@ define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -149,8 +149,8 @@ define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %ev ; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,11 +163,11 @@ define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -189,9 +189,9 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext ; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -207,12 +207,12 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -235,8 +235,8 @@ define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %ev ; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,11 +249,11 @@ define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -271,14 +271,14 @@ declare <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -295,12 +295,12 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -323,8 +323,8 @@ define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext ; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -337,11 +337,11 @@ define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,9 +363,9 @@ define <2 x float> @vp_roundtozero_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroe ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -384,8 +384,8 @@ define <2 x float> @vp_roundtozero_v2f32_unmasked(<2 x float> %va, i32 zeroext % ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -405,9 +405,9 @@ define <4 x float> @vp_roundtozero_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroe ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -426,8 +426,8 @@ define <4 x float> @vp_roundtozero_v4f32_unmasked(<4 x float> %va, i32 zeroext % ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -444,13 +444,13 @@ define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: vp_roundtozero_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -470,8 +470,8 @@ define <8 x float> @vp_roundtozero_v8f32_unmasked(<8 x float> %va, i32 zeroext % ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -488,13 +488,13 @@ define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 z ; CHECK-LABEL: vp_roundtozero_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,8 +514,8 @@ define <16 x float> @vp_roundtozero_v16f32_unmasked(<16 x float> %va, i32 zeroex ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -535,9 +535,9 @@ define <2 x double> @vp_roundtozero_v2f64(<2 x double> %va, <2 x i1> %m, i32 zer ; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -556,8 +556,8 @@ define <2 x double> @vp_roundtozero_v2f64_unmasked(<2 x double> %va, i32 zeroext ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -573,14 +573,14 @@ declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -600,8 +600,8 @@ define <4 x double> @vp_roundtozero_v4f64_unmasked(<4 x double> %va, i32 zeroext ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -617,14 +617,14 @@ declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -644,8 +644,8 @@ define <8 x double> @vp_roundtozero_v8f64_unmasked(<8 x double> %va, i32 zeroext ; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -661,14 +661,14 @@ declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI22_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -688,8 +688,8 @@ define <15 x double> @vp_roundtozero_v15f64_unmasked(<15 x double> %va, i32 zero ; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -705,14 +705,14 @@ declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI24_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI24_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -732,8 +732,8 @@ define <16 x double> @vp_roundtozero_v16f64_unmasked(<16 x double> %va, i32 zero ; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -749,44 +749,44 @@ declare <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: lui a1, %hi(.LCPI26_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1) -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: lui a2, %hi(.LCPI26_0) +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) +; CHECK-NEXT: addi a2, a0, -16 +; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: fsrmi a2, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vmv1r.v v5, v7 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -806,28 +806,25 @@ define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zero ; CHECK-NEXT: .LBB27_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: lui a2, %hi(.LCPI27_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) -; CHECK-NEXT: addi a2, a0, -16 -; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: lui a1, %hi(.LCPI27_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1) +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a2 -; CHECK-NEXT: fsrmi a2, 1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: vmflt.vf v7, v24, fa5 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v24, v16 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll index 318f38839851c..45874ec456354 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll @@ -157,10 +157,10 @@ define <64 x i32> @select_addsub_v64i32(<64 x i1> %cc, <64 x i32> %a, <64 x i32> ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) ; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: vadd.vv v8, v16, v8 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vadd.vv v8, v16, v8 ; CHECK-NEXT: vrsub.vi v24, v24, 0, v0.t ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index c05f306424519..73d1b2c257f6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -31,8 +31,8 @@ define <8 x i1> @v8i1_v16i1(<16 x i1>) { ; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: vslide1down.vx v9, v9, a2 -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v9, v9, a0 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV32-NEXT: vand.vi v8, v8, 1 ; RV32-NEXT: vmsne.vi v0, v8, 0 @@ -65,8 +65,8 @@ define <8 x i1> @v8i1_v16i1(<16 x i1>) { ; RV64-NEXT: srli a0, a0, 63 ; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: vslide1down.vx v9, v9, a2 -; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslide1down.vx v9, v9, a0 +; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64-NEXT: vand.vi v8, v8, 1 ; RV64-NEXT: vmsne.vi v0, v8, 0 @@ -82,10 +82,10 @@ define <4 x i32> @v4i32_v8i32(<8 x i32>) { ; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vslidedown.vi v10, v8, 2 ; CHECK-NEXT: vslideup.vi v10, v8, 1, v0.t -; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -104,8 +104,8 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) { ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v12, v12, 3, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vnsrl.wx v10, v8, a0 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 ; CHECK-NEXT: ret @@ -246,11 +246,11 @@ define <16 x i32> @v16i32_v4i32(<4 x i32>) { ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: addi a0, a0, -1856 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll index 5e6d7c1eedb76..f9a7d9588e3d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll @@ -145,15 +145,13 @@ define void @deinterleave6_0_i8(ptr %in, ptr %out) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.v.i v0, 2 -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v9, v8, 5, v0.t -; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vslidedown.vi v9, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v9, v8, 4, v0.t -; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vrgather.vi v8, v9, 4, v0.t +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -191,15 +189,13 @@ define void @deinterleave7_0_i8(ptr %in, ptr %out) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.v.i v0, 2 -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v9, v8, 6, v0.t -; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vslidedown.vi v9, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v9, v8, 6, v0.t -; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vrgather.vi v8, v9, 6, v0.t +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -279,18 +275,17 @@ define void @deinterleave7_0_i64(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v16, v8, 8 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 -; CHECK-NEXT: vmv4r.v v16, v8 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vslidedown.vi v16, v8, 6, v0.t +; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 4 -; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vrgather.vi v16, v8, 6, v0.t -; CHECK-NEXT: vse64.v v16, (a1) +; CHECK-NEXT: vrgather.vi v8, v16, 6, v0.t +; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i64>, ptr %in @@ -324,19 +319,18 @@ define void @deinterleave7_0_i32_subvec(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vi v12, v8, 8 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 -; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vslidedown.vi v12, v8, 6, v0.t +; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 4 -; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vrgather.vi v12, v8, 6, v0.t +; CHECK-NEXT: vrgather.vi v8, v12, 6, v0.t ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vse32.v v12, (a1) +; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i32>, ptr %in diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll index 9279e0a4d3a6c..813a8aa40fb64 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll @@ -179,9 +179,9 @@ define void @vnsrl_32_i32(ptr %in, ptr %out) { ; ZVE32F: # %bb.0: # %entry ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVE32F-NEXT: vle32.v v8, (a0) -; ZVE32F-NEXT: vmv.v.i v0, 1 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; ZVE32F-NEXT: vmv.v.i v0, 1 ; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a1) ; ZVE32F-NEXT: ret @@ -233,9 +233,9 @@ define void @vnsrl_32_float(ptr %in, ptr %out) { ; ZVE32F: # %bb.0: # %entry ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVE32F-NEXT: vle32.v v8, (a0) -; ZVE32F-NEXT: vmv.v.i v0, 1 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; ZVE32F-NEXT: vmv.v.i v0, 1 ; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a1) ; ZVE32F-NEXT: ret @@ -276,9 +276,9 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) { ; V: # %bb.0: # %entry ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; V-NEXT: vle64.v v8, (a0) -; V-NEXT: vmv.v.i v0, 1 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; V-NEXT: vslidedown.vi v9, v8, 2 +; V-NEXT: vmv.v.i v0, 1 ; V-NEXT: vslidedown.vi v9, v8, 1, v0.t ; V-NEXT: vse64.v v9, (a1) ; V-NEXT: ret @@ -327,9 +327,9 @@ define void @vnsrl_64_double(ptr %in, ptr %out) { ; V: # %bb.0: # %entry ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; V-NEXT: vle64.v v8, (a0) -; V-NEXT: vmv.v.i v0, 1 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; V-NEXT: vslidedown.vi v9, v8, 2 +; V-NEXT: vmv.v.i v0, 1 ; V-NEXT: vslidedown.vi v9, v8, 1, v0.t ; V-NEXT: vse64.v v9, (a1) ; V-NEXT: ret @@ -402,21 +402,21 @@ define void @vnsrl_0_i8_undef_negative(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: lui a0, %hi(.LCPI17_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI17_0) ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: li a0, 48 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu -; CHECK-NEXT: vslideup.vi v11, v10, 4 -; CHECK-NEXT: vslideup.vi v11, v10, 3, v0.t -; CHECK-NEXT: li a0, 48 +; CHECK-NEXT: vrgather.vv v11, v8, v9 +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vslideup.vi v8, v10, 3, v0.t ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v11, v0 +; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index 9629b3547b3d0..0e806e5871a53 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -185,12 +185,11 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) { ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: vmv.v.i v0, 1 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vslidedown.vi v10, v10, 1, v0.t ; CHECK-NEXT: vmv.v.i v0, 5 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 ; CHECK-NEXT: addi a0, a1, 672 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -209,13 +208,14 @@ define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) { ; CHECK-LABEL: shuffle2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v8, 1 -; CHECK-NEXT: vmv.v.i v0, 6 -; CHECK-NEXT: vslideup.vi v12, v8, 2 +; CHECK-NEXT: vmv1r.v v12, v8 +; CHECK-NEXT: vslidedown.vi v13, v8, 1 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v9, v12, v0 +; CHECK-NEXT: vslideup.vi v13, v12, 2 +; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: vmerge.vvm v9, v9, v13, v0 ; CHECK-NEXT: ret %b = extractelement <4 x float> %a, i32 2 %c = insertelement <16 x float> , float %b, i32 5 @@ -253,11 +253,10 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca ; RV64-NEXT: addi s0, sp, 256 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -128 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.i v0, 1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmv.v.i v0, 1 ; RV64-NEXT: vslidedown.vi v18, v15, 1, v0.t ; RV64-NEXT: mv s2, sp ; RV64-NEXT: vs8r.v v16, (s2) @@ -344,17 +343,19 @@ define i64 @multi_chunks_shuffle(<32 x i32> %0) vscale_range(8,8) { ; RV32-NEXT: vwsubu.vx v10, v12, a0 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vmv.v.x v12, a0 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: vand.vx v10, v10, a1 ; RV32-NEXT: vand.vx v12, v12, a1 ; RV32-NEXT: vsrl.vv v10, v8, v10 ; RV32-NEXT: vsll.vv v8, v8, v12 -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetivli zero, 16, e64, m2, ta, ma ; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 +; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV32-NEXT: vrgather.vi v10, v8, 2 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -369,12 +370,12 @@ define i64 @multi_chunks_shuffle(<32 x i32> %0) vscale_range(8,8) { ; RV64-NEXT: vsetivli zero, 16, e64, m2, ta, ma ; RV64-NEXT: vsrl.vx v10, v8, a0 ; RV64-NEXT: vsll.vx v8, v8, a0 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addi a0, a0, -241 +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: vrgather.vi v10, v8, 2 ; RV64-NEXT: vor.vv v8, v8, v10 @@ -433,11 +434,9 @@ define void @shuffle_3_input_vectors() vscale_range(4,4) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 1 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 6 -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.i v0, 6 ; CHECK-NEXT: vslidedown.vi v20, v8, 1, v0.t ; CHECK-NEXT: vslideup.vi v20, v9, 3 ; CHECK-NEXT: vslidedown.vi v21, v9, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp-interleave.ll index 9102cd6ed7036..28f84a5de6470 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp-interleave.ll @@ -53,11 +53,11 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; V512: # %bb.0: ; V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; V512-NEXT: vslideup.vi v10, v9, 1 -; V512-NEXT: vmv1r.v v11, v8 ; V512-NEXT: vslideup.vi v10, v9, 2 +; V512-NEXT: vmv1r.v v9, v8 +; V512-NEXT: vslideup.vi v9, v8, 1 ; V512-NEXT: vmv.v.i v0, 10 -; V512-NEXT: vslideup.vi v11, v8, 1 -; V512-NEXT: vmerge.vvm v8, v11, v10, v0 +; V512-NEXT: vmerge.vvm v8, v9, v10, v0 ; V512-NEXT: ret %a = shufflevector <2 x double> %x, <2 x double> %y, <4 x i32> ret <4 x double> %a @@ -251,8 +251,8 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-NEXT: vzext.vf2 v8, v24 ; V128-NEXT: addi a1, a1, -1366 ; V128-NEXT: vzext.vf2 v24, v0 -; V128-NEXT: vmv.s.x v0, a1 ; V128-NEXT: vsll.vx v8, v8, a0 +; V128-NEXT: vmv.s.x v0, a1 ; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v24, v8, v0 ; V128-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll index 4812b27f442c0..4bef5cdc5d196 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll @@ -95,10 +95,10 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v8, 1 +; CHECK-NEXT: vslideup.vi v12, v8, 2 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vslideup.vi v12, v8, 2 ; CHECK-NEXT: vrgather.vi v12, v10, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -109,17 +109,17 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; CHECK-LABEL: vrgather_shuffle_xv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI8_0)(a0) +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vslideup.vi v10, v8, 2, v0.t +; CHECK-NEXT: vfmv.v.f v8, fa5 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa5 ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x double> , <4 x double> %x, <4 x i32> @@ -129,16 +129,16 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { ; CHECK-LABEL: vrgather_shuffle_vx_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t +; CHECK-NEXT: vfmv.v.f v10, fa5 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v10, fa5 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> @@ -316,8 +316,8 @@ define <4 x bfloat> @vrgather_shuffle_vv_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vslidedown.vi v10, v8, 1 -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vslideup.vi v10, v8, 2 +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -354,8 +354,8 @@ define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vslidedown.vi v10, v8, 1 -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vslideup.vi v10, v8, 2 +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -417,10 +417,10 @@ define <16 x float> @shuffle_disjoint_lanes_one_broadcast(<16 x float> %v, <16 x ; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vle16.v v20, (a0) +; CHECK-NEXT: vrgather.vi v16, v8, 7 ; CHECK-NEXT: lui a0, 15 ; CHECK-NEXT: addi a0, a0, 240 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vi v16, v8, 7 ; CHECK-NEXT: vrgatherei16.vv v16, v12, v20, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret @@ -431,14 +431,14 @@ define <16 x float> @shuffle_disjoint_lanes_one_broadcast(<16 x float> %v, <16 x define <16 x float> @shuffle_disjoint_lanes_one_splat(float %v, <16 x float> %w) { ; CHECK-LABEL: shuffle_disjoint_lanes_one_splat: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: lui a0, %hi(.LCPI33_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0) -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: lui a0, 15 ; CHECK-NEXT: addi a0, a0, 240 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll index 93c00d5c03717..31709c8fc596f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll @@ -66,11 +66,11 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; V512: # %bb.0: ; V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; V512-NEXT: vslideup.vi v10, v9, 1 -; V512-NEXT: vmv1r.v v11, v8 ; V512-NEXT: vslideup.vi v10, v9, 2 +; V512-NEXT: vmv1r.v v9, v8 +; V512-NEXT: vslideup.vi v9, v8, 1 ; V512-NEXT: vmv.v.i v0, 10 -; V512-NEXT: vslideup.vi v11, v8, 1 -; V512-NEXT: vmerge.vvm v8, v11, v10, v0 +; V512-NEXT: vmerge.vvm v8, v9, v10, v0 ; V512-NEXT: ret %a = shufflevector <2 x i64> %x, <2 x i64> %y, <4 x i32> ret <4 x i64> %a @@ -415,8 +415,8 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-NEXT: vzext.vf2 v8, v24 ; V128-NEXT: addi a1, a1, -1366 ; V128-NEXT: vzext.vf2 v24, v0 -; V128-NEXT: vmv.s.x v0, a1 ; V128-NEXT: vsll.vx v8, v8, a0 +; V128-NEXT: vmv.s.x v0, a1 ; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v24, v8, v0 ; V128-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll index 63fd1d1ed2d25..7571e4c357991 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll @@ -76,8 +76,8 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vslidedown.vi v10, v8, 1 -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vslideup.vi v10, v8, 2 +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -145,20 +145,18 @@ define <8 x i64> @vrgather_permute_shuffle_uv_v8i64(<8 x i64> %x) { define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV32-LABEL: vrgather_shuffle_vv_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vmv.v.i v16, 2 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v20, (a0) -; RV32-NEXT: vmv.v.i v21, 2 +; RV32-NEXT: li a0, 5 +; RV32-NEXT: vslide1down.vx v21, v16, a0 ; RV32-NEXT: li a0, 164 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v16, v8, v20 ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: li a0, 5 -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vslide1down.vx v8, v21, a0 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v16, v12, v8, v0.t +; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -201,8 +199,8 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) ; RV32-NEXT: vle16.v v21, (a0) ; RV32-NEXT: li a0, 113 -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vrgatherei16.vv v12, v16, v20 +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vrgatherei16.vv v12, v8, v21, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret @@ -355,10 +353,10 @@ define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) { define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 2 +; CHECK-NEXT: li a0, 66 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -375,9 +373,9 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv.s.x v11, a0 ; CHECK-NEXT: li a0, 66 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -390,10 +388,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 2 -; CHECK-NEXT: li a0, 67 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: li a0, 67 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -410,9 +408,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: li a0, 66 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -424,16 +422,16 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i2we4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: vmv.v.i v11, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v11, v10, 2 +; CHECK-NEXT: vslideup.vi v12, v11, 2 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 2 -; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -660,10 +658,10 @@ define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -22 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, -22 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -1047,10 +1045,10 @@ define <16 x i32> @shuffle_disjoint_lanes_one_broadcast(<16 x i32> %v, <16 x i32 ; CHECK-NEXT: addi a0, a0, %lo(.LCPI72_0) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vle16.v v20, (a0) +; CHECK-NEXT: vrgather.vi v16, v8, 7 ; CHECK-NEXT: lui a0, 15 ; CHECK-NEXT: addi a0, a0, 240 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vi v16, v8, 7 ; CHECK-NEXT: vrgatherei16.vv v16, v12, v20, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret @@ -1061,14 +1059,14 @@ define <16 x i32> @shuffle_disjoint_lanes_one_broadcast(<16 x i32> %v, <16 x i32 define <16 x i32> @shuffle_disjoint_lanes_one_splat(i32 %v, <16 x i32> %w) { ; CHECK-LABEL: shuffle_disjoint_lanes_one_splat: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI73_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI73_0) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vle16.v v16, (a1) -; CHECK-NEXT: lui a1, 15 -; CHECK-NEXT: addi a1, a1, 240 -; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI73_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: lui a0, 15 +; CHECK-NEXT: addi a0, a0, 240 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -1295,10 +1293,11 @@ define void @shuffle_i256_splat(ptr %p) nounwind { define <16 x i32> @shuffle_m1_prefix(<16 x i32> %a) { ; CHECK-LABEL: shuffle_m1_prefix: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vslideup.vi v12, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll index fe2072990e2ac..f6c6c4f52a70a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll @@ -510,10 +510,10 @@ define <8 x i16> @shuffle_v8i16_as_i64_16(<8 x i16> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8i16_as_i64_16: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: li a0, 136 ; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu -; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 1 +; ZVKB-ZVE32X-NEXT: li a0, 136 +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 3, v0.t ; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret @@ -555,10 +555,10 @@ define <8 x i16> @shuffle_v8i16_as_i64_32(<8 x i16> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8i16_as_i64_32: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: li a0, 204 ; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu -; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 2 +; ZVKB-ZVE32X-NEXT: li a0, 204 +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 2, v0.t ; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret @@ -600,10 +600,10 @@ define <8 x i16> @shuffle_v8i16_as_i64_48(<8 x i16> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8i16_as_i64_48: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: li a0, -18 ; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu -; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 3 +; ZVKB-ZVE32X-NEXT: li a0, -18 +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 1, v0.t ; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret @@ -645,10 +645,10 @@ define <8 x i32> @shuffle_v8i32_as_i64(<8 x i32> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8i32_as_i64: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: li a0, 170 ; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e32, m4, ta, mu -; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslidedown.vi v12, v8, 1 +; ZVKB-ZVE32X-NEXT: li a0, 170 +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslideup.vi v12, v8, 1, v0.t ; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12 ; ZVKB-ZVE32X-NEXT: ret @@ -714,10 +714,10 @@ define <8 x half> @shuffle_v8f16_as_i64_16(<8 x half> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8f16_as_i64_16: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: li a0, 136 ; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu -; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 1 +; ZVKB-ZVE32X-NEXT: li a0, 136 +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 3, v0.t ; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret @@ -759,10 +759,10 @@ define <8 x half> @shuffle_v8f16_as_i64_32(<8 x half> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8f16_as_i64_32: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: li a0, 204 ; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu -; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 2 +; ZVKB-ZVE32X-NEXT: li a0, 204 +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 2, v0.t ; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret @@ -804,10 +804,10 @@ define <8 x half> @shuffle_v8f16_as_i64_48(<8 x half> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8f16_as_i64_48: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: li a0, -18 ; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, mu -; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 3 +; ZVKB-ZVE32X-NEXT: li a0, -18 +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 1, v0.t ; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10 ; ZVKB-ZVE32X-NEXT: ret @@ -849,10 +849,10 @@ define <8 x float> @shuffle_v8f32_as_i64(<8 x float> %v) { ; ; ZVKB-ZVE32X-LABEL: shuffle_v8f32_as_i64: ; ZVKB-ZVE32X: # %bb.0: -; ZVKB-ZVE32X-NEXT: li a0, 170 ; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e32, m4, ta, mu -; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslidedown.vi v12, v8, 1 +; ZVKB-ZVE32X-NEXT: li a0, 170 +; ZVKB-ZVE32X-NEXT: vmv.s.x v0, a0 ; ZVKB-ZVE32X-NEXT: vslideup.vi v12, v8, 1, v0.t ; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12 ; ZVKB-ZVE32X-NEXT: ret @@ -895,8 +895,8 @@ define <8 x float> @shuffle_v8f32_as_i64_exact(<8 x float> %v) vscale_range(2,2) ; ZVKB-ZVE32X-LABEL: shuffle_v8f32_as_i64_exact: ; ZVKB-ZVE32X: # %bb.0: ; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; ZVKB-ZVE32X-NEXT: vmv.v.i v0, 10 ; ZVKB-ZVE32X-NEXT: vslidedown.vi v11, v9, 1 +; ZVKB-ZVE32X-NEXT: vmv.v.i v0, 10 ; ZVKB-ZVE32X-NEXT: vslideup.vi v11, v9, 1, v0.t ; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 1 ; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 1, v0.t @@ -909,20 +909,20 @@ define <8 x float> @shuffle_v8f32_as_i64_exact(<8 x float> %v) vscale_range(2,2) define <8 x i64> @shuffle_v8i64_as_i128(<8 x i64> %v) { ; CHECK-LABEL: shuffle_v8i64_as_i128: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v12, v8, 1 +; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslideup.vi v12, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret ; ; ZVKB-V-LABEL: shuffle_v8i64_as_i128: ; ZVKB-V: # %bb.0: -; ZVKB-V-NEXT: li a0, 170 ; ZVKB-V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; ZVKB-V-NEXT: vmv.s.x v0, a0 ; ZVKB-V-NEXT: vslidedown.vi v12, v8, 1 +; ZVKB-V-NEXT: li a0, 170 +; ZVKB-V-NEXT: vmv.s.x v0, a0 ; ZVKB-V-NEXT: vslideup.vi v12, v8, 1, v0.t ; ZVKB-V-NEXT: vmv.v.v v8, v12 ; ZVKB-V-NEXT: ret @@ -934,20 +934,20 @@ define <8 x i64> @shuffle_v8i64_as_i128(<8 x i64> %v) { define <8 x i64> @shuffle_v8i64_as_i128_2(<8 x i64> %v) { ; CHECK-LABEL: shuffle_v8i64_as_i128_2: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 168 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v12, v8, 1 +; CHECK-NEXT: li a0, 168 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslideup.vi v12, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret ; ; ZVKB-V-LABEL: shuffle_v8i64_as_i128_2: ; ZVKB-V: # %bb.0: -; ZVKB-V-NEXT: li a0, 168 ; ZVKB-V-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; ZVKB-V-NEXT: vmv.s.x v0, a0 ; ZVKB-V-NEXT: vslidedown.vi v12, v8, 1 +; ZVKB-V-NEXT: li a0, 168 +; ZVKB-V-NEXT: vmv.s.x v0, a0 ; ZVKB-V-NEXT: vslideup.vi v12, v8, 1, v0.t ; ZVKB-V-NEXT: vmv.v.v v8, v12 ; ZVKB-V-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index 29d9a8a9b060c..81d8a11bf26fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -64,8 +64,8 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t ; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t ; CHECK-NEXT: vadd.vv v9, v10, v9 ; CHECK-NEXT: vse8.v v9, (a0) ; CHECK-NEXT: addi a0, a0, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 4b7f82f94f5e4..7b061b01252ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -632,18 +632,19 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: .LBB47_6: ; CHECK-RV32-NEXT: mul a5, a4, a2 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV32-NEXT: add a5, a1, a5 -; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV32-NEXT: vslidedown.vi v9, v8, 2 ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v24, (a1), a2, v0.t +; CHECK-RV32-NEXT: add a1, a1, a5 +; CHECK-RV32-NEXT: vmv1r.v v0, v9 +; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV32-NEXT: addi a1, a0, 128 ; CHECK-RV32-NEXT: addi a2, a0, 256 ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-RV32-NEXT: vse64.v v8, (a0) -; CHECK-RV32-NEXT: vse64.v v24, (a1) +; CHECK-RV32-NEXT: vse64.v v24, (a0) +; CHECK-RV32-NEXT: vse64.v v8, (a1) ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vse64.v v16, (a2) ; CHECK-RV32-NEXT: ret @@ -683,18 +684,19 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: .LBB47_6: ; CHECK-RV64-NEXT: mul a5, a3, a2 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV64-NEXT: add a5, a1, a5 -; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV64-NEXT: vslidedown.vi v9, v8, 2 ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v24, (a1), a2, v0.t +; CHECK-RV64-NEXT: add a1, a1, a5 +; CHECK-RV64-NEXT: vmv1r.v v0, v9 +; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV64-NEXT: addi a1, a0, 128 ; CHECK-RV64-NEXT: addi a2, a0, 256 ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-RV64-NEXT: vse64.v v8, (a0) -; CHECK-RV64-NEXT: vse64.v v24, (a1) +; CHECK-RV64-NEXT: vse64.v v24, (a0) +; CHECK-RV64-NEXT: vse64.v v8, (a1) ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vse64.v v16, (a2) ; CHECK-RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll index 7ca329835b7ac..733c850d64011 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll @@ -472,9 +472,9 @@ define void @strided_store_v32f64(<32 x double> %v, ptr %ptr, i32 signext %strid ; CHECK-NEXT: addi a3, a2, -16 ; CHECK-NEXT: sltu a2, a2, a3 ; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index d2b8790e958c0..82bde8a045594 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -229,19 +229,20 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 72 +; RV32-NEXT: li a3, 80 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 72 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0xd0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 80 * vlenb ; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v7, v0 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 @@ -272,14 +273,14 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a6, 64 ; RV32-NEXT: .LBB16_4: -; RV32-NEXT: addi t2, a1, 128 +; RV32-NEXT: addi t3, a1, 128 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v6, v4, 2 -; RV32-NEXT: addi t6, a1, 512 -; RV32-NEXT: addi t5, a1, 640 +; RV32-NEXT: addi s0, a1, 512 +; RV32-NEXT: addi t6, a1, 640 ; RV32-NEXT: vslidedown.vi v0, v3, 2 ; RV32-NEXT: addi t1, t1, -1 -; RV32-NEXT: addi t3, a1, 384 +; RV32-NEXT: addi t2, a1, 384 ; RV32-NEXT: vslidedown.vi v2, v5, 2 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: addi t4, a6, -32 @@ -287,39 +288,41 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV32-NEXT: addi a6, a6, -1 ; RV32-NEXT: and a6, a6, t4 ; RV32-NEXT: addi t4, a6, -16 -; RV32-NEXT: sltu s0, a6, t4 -; RV32-NEXT: addi s0, s0, -1 +; RV32-NEXT: sltu t5, a6, t4 +; RV32-NEXT: addi t5, t5, -1 ; RV32-NEXT: bltu a6, a2, .LBB16_6 ; RV32-NEXT: # %bb.5: ; RV32-NEXT: li a6, 16 ; RV32-NEXT: .LBB16_6: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (s0) +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: slli s0, s0, 6 +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: addi s0, s0, 16 +; RV32-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill ; RV32-NEXT: vle64.v v8, (t6) -; RV32-NEXT: csrr t6, vlenb -; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: li a0, 56 -; RV32-NEXT: mul t6, t6, a0 -; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: add t6, sp, t6 -; RV32-NEXT: addi t6, t6, 16 -; RV32-NEXT: vs8r.v v8, (t6) # Unknown-size Folded Spill -; RV32-NEXT: vle64.v v8, (t5) -; RV32-NEXT: vle64.v v16, (t2) +; RV32-NEXT: vle64.v v16, (t3) +; RV32-NEXT: csrr t3, vlenb +; RV32-NEXT: slli t3, t3, 4 +; RV32-NEXT: add t3, sp, t3 +; RV32-NEXT: addi t3, t3, 16 +; RV32-NEXT: vs8r.v v16, (t3) # Unknown-size Folded Spill ; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: csrr t3, vlenb +; RV32-NEXT: li t6, 56 +; RV32-NEXT: mul t3, t3, t6 +; RV32-NEXT: add t3, sp, t3 +; RV32-NEXT: addi t3, t3, 16 +; RV32-NEXT: vs8r.v v24, (t3) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v24, (t2) ; RV32-NEXT: csrr t2, vlenb -; RV32-NEXT: li t5, 48 -; RV32-NEXT: mul t2, t2, t5 -; RV32-NEXT: add t2, sp, t2 -; RV32-NEXT: addi t2, t2, 16 -; RV32-NEXT: vs8r.v v24, (t2) # Unknown-size Folded Spill -; RV32-NEXT: vle64.v v24, (t3) -; RV32-NEXT: csrr t2, vlenb -; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: slli t2, t2, 5 ; RV32-NEXT: add t2, sp, t2 ; RV32-NEXT: addi t2, t2, 16 ; RV32-NEXT: vs8r.v v24, (t2) # Unknown-size Folded Spill ; RV32-NEXT: and t2, t1, t0 -; RV32-NEXT: and t1, s0, t4 +; RV32-NEXT: and t1, t5, t4 ; RV32-NEXT: addi a1, a1, 256 ; RV32-NEXT: mv t0, a4 ; RV32-NEXT: bltu a4, a3, .LBB16_8 @@ -327,32 +330,34 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV32-NEXT: li t0, 32 ; RV32-NEXT: .LBB16_8: ; RV32-NEXT: vsetvli zero, t2, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v24, v8, 0, v0.t -; RV32-NEXT: csrr t2, vlenb -; RV32-NEXT: li t3, 24 -; RV32-NEXT: mul t2, t2, t3 -; RV32-NEXT: add t2, sp, t2 -; RV32-NEXT: addi t2, t2, 16 -; RV32-NEXT: vs8r.v v24, (t2) # Unknown-size Folded Spill +; RV32-NEXT: vnsrl.wi v16, v8, 0, v0.t +; RV32-NEXT: addi t2, sp, 16 +; RV32-NEXT: vs8r.v v16, (t2) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr t2, vlenb -; RV32-NEXT: li t3, 56 -; RV32-NEXT: mul t2, t2, t3 +; RV32-NEXT: slli t2, t2, 6 ; RV32-NEXT: add t2, sp, t2 ; RV32-NEXT: addi t2, t2, 16 -; RV32-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (t2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v8, v24, 0, v0.t +; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t ; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 6 +; RV32-NEXT: li t2, 72 +; RV32-NEXT: mul a5, a5, t2 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, t1, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t ; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: li t1, 24 +; RV32-NEXT: mul a5, a5, t1 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill @@ -362,13 +367,21 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV32-NEXT: and a5, t0, a5 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a1) -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v30, v7, 2 +; RV32-NEXT: vslidedown.vi v8, v7, 2 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li t0, 48 +; RV32-NEXT: li t0, 56 ; RV32-NEXT: mul a1, a1, t0 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -376,99 +389,90 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV32-NEXT: vsetvli zero, a6, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a6, 56 -; RV32-NEXT: mul a1, a1, a6 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v16, v8, 0, v0.t +; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t ; RV32-NEXT: bltu a4, a2, .LBB16_10 ; RV32-NEXT: # %bb.9: ; RV32-NEXT: li a4, 16 ; RV32-NEXT: .LBB16_10: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v5 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; RV32-NEXT: vnsrl.wi v24, v8, 0, v0.t +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 48 +; RV32-NEXT: li a4, 56 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: mv a1, a7 ; RV32-NEXT: bltu a7, a3, .LBB16_12 ; RV32-NEXT: # %bb.11: ; RV32-NEXT: li a1, 32 ; RV32-NEXT: .LBB16_12: -; RV32-NEXT: vmv1r.v v0, v30 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vmv4r.v v24, v16 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vmv4r.v v8, v24 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 +; RV32-NEXT: li a5, 72 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 6 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vslideup.vi v16, v8, 16 +; RV32-NEXT: vslideup.vi v16, v24, 16 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 6 +; RV32-NEXT: li a5, 72 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: addi a4, a1, -16 ; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 56 -; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: slli a5, a5, 6 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v16, v24, 16 +; RV32-NEXT: vslideup.vi v16, v0, 16 ; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 56 -; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: slli a5, a5, 6 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 48 +; RV32-NEXT: li a6, 56 ; RV32-NEXT: mul a5, a5, a6 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vslideup.vi v16, v8, 16 ; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 48 +; RV32-NEXT: li a6, 56 ; RV32-NEXT: mul a5, a5, a6 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 @@ -477,7 +481,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a4 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload @@ -487,9 +497,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV32-NEXT: # %bb.13: ; RV32-NEXT: li a7, 16 ; RV32-NEXT: .LBB16_14: -; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -501,7 +515,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV32-NEXT: vse32.v v24, (a0) ; RV32-NEXT: addi a1, a0, 256 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 48 +; RV32-NEXT: li a3, 56 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 @@ -509,21 +523,21 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 56 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 6 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a0, a0, 384 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a2, 72 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 72 +; RV32-NEXT: li a1, 80 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 32 @@ -540,19 +554,20 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV64-NEXT: sd s0, 40(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset s0, -8 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 72 +; RV64-NEXT: li a3, 80 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 72 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0xd0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 80 * vlenb ; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v7, v0 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: li a3, 40 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 32 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 40 +; RV64-NEXT: li a3, 48 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 32 @@ -583,14 +598,14 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a6, 64 ; RV64-NEXT: .LBB16_4: -; RV64-NEXT: addi t2, a1, 128 +; RV64-NEXT: addi t3, a1, 128 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v6, v4, 2 -; RV64-NEXT: addi t6, a1, 512 -; RV64-NEXT: addi t5, a1, 640 +; RV64-NEXT: addi s0, a1, 512 +; RV64-NEXT: addi t6, a1, 640 ; RV64-NEXT: vslidedown.vi v0, v3, 2 ; RV64-NEXT: addi t1, t1, -1 -; RV64-NEXT: addi t3, a1, 384 +; RV64-NEXT: addi t2, a1, 384 ; RV64-NEXT: vslidedown.vi v2, v5, 2 ; RV64-NEXT: li a3, 32 ; RV64-NEXT: addi t4, a6, -32 @@ -598,39 +613,41 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV64-NEXT: addi a6, a6, -1 ; RV64-NEXT: and a6, a6, t4 ; RV64-NEXT: addi t4, a6, -16 -; RV64-NEXT: sltu s0, a6, t4 -; RV64-NEXT: addi s0, s0, -1 +; RV64-NEXT: sltu t5, a6, t4 +; RV64-NEXT: addi t5, t5, -1 ; RV64-NEXT: bltu a6, a2, .LBB16_6 ; RV64-NEXT: # %bb.5: ; RV64-NEXT: li a6, 16 ; RV64-NEXT: .LBB16_6: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (s0) +; RV64-NEXT: csrr s0, vlenb +; RV64-NEXT: slli s0, s0, 6 +; RV64-NEXT: add s0, sp, s0 +; RV64-NEXT: addi s0, s0, 32 +; RV64-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill ; RV64-NEXT: vle64.v v8, (t6) -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: li a0, 56 -; RV64-NEXT: mul t6, t6, a0 -; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 32 -; RV64-NEXT: vs8r.v v8, (t6) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v8, (t5) -; RV64-NEXT: vle64.v v16, (t2) +; RV64-NEXT: vle64.v v16, (t3) +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: slli t3, t3, 4 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 32 +; RV64-NEXT: vs8r.v v16, (t3) # Unknown-size Folded Spill ; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: li t6, 56 +; RV64-NEXT: mul t3, t3, t6 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 32 +; RV64-NEXT: vs8r.v v24, (t3) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v24, (t2) ; RV64-NEXT: csrr t2, vlenb -; RV64-NEXT: li t5, 48 -; RV64-NEXT: mul t2, t2, t5 -; RV64-NEXT: add t2, sp, t2 -; RV64-NEXT: addi t2, t2, 32 -; RV64-NEXT: vs8r.v v24, (t2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v24, (t3) -; RV64-NEXT: csrr t2, vlenb -; RV64-NEXT: slli t2, t2, 3 +; RV64-NEXT: slli t2, t2, 5 ; RV64-NEXT: add t2, sp, t2 ; RV64-NEXT: addi t2, t2, 32 ; RV64-NEXT: vs8r.v v24, (t2) # Unknown-size Folded Spill ; RV64-NEXT: and t2, t1, t0 -; RV64-NEXT: and t1, s0, t4 +; RV64-NEXT: and t1, t5, t4 ; RV64-NEXT: addi a1, a1, 256 ; RV64-NEXT: mv t0, a4 ; RV64-NEXT: bltu a4, a3, .LBB16_8 @@ -638,32 +655,34 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV64-NEXT: li t0, 32 ; RV64-NEXT: .LBB16_8: ; RV64-NEXT: vsetvli zero, t2, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v24, v8, 0, v0.t -; RV64-NEXT: csrr t2, vlenb -; RV64-NEXT: li t3, 24 -; RV64-NEXT: mul t2, t2, t3 -; RV64-NEXT: add t2, sp, t2 -; RV64-NEXT: addi t2, t2, 32 -; RV64-NEXT: vs8r.v v24, (t2) # Unknown-size Folded Spill +; RV64-NEXT: vnsrl.wi v16, v8, 0, v0.t +; RV64-NEXT: addi t2, sp, 32 +; RV64-NEXT: vs8r.v v16, (t2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: csrr t2, vlenb -; RV64-NEXT: li t3, 56 -; RV64-NEXT: mul t2, t2, t3 +; RV64-NEXT: slli t2, t2, 6 ; RV64-NEXT: add t2, sp, t2 ; RV64-NEXT: addi t2, t2, 32 -; RV64-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (t2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v8, v24, 0, v0.t +; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t ; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 6 +; RV64-NEXT: li t2, 72 +; RV64-NEXT: mul a5, a5, t2 ; RV64-NEXT: add a5, sp, a5 ; RV64-NEXT: addi a5, a5, 32 ; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, t1, e32, m4, ta, ma ; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t ; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: li t1, 24 +; RV64-NEXT: mul a5, a5, t1 ; RV64-NEXT: add a5, sp, a5 ; RV64-NEXT: addi a5, a5, 32 ; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill @@ -673,13 +692,21 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV64-NEXT: and a5, t0, a5 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a1) -; RV64-NEXT: addi a1, sp, 32 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v30, v7, 2 +; RV64-NEXT: vslidedown.vi v8, v7, 2 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li t0, 48 +; RV64-NEXT: li t0, 56 ; RV64-NEXT: mul a1, a1, t0 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 32 @@ -687,99 +714,90 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV64-NEXT: vsetvli zero, a6, e32, m4, ta, ma ; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a6, 56 -; RV64-NEXT: mul a1, a1, a6 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 32 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v16, v8, 0, v0.t +; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t ; RV64-NEXT: bltu a4, a2, .LBB16_10 ; RV64-NEXT: # %bb.9: ; RV64-NEXT: li a4, 16 ; RV64-NEXT: .LBB16_10: +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v5 -; RV64-NEXT: addi a1, sp, 32 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; RV64-NEXT: vnsrl.wi v24, v8, 0, v0.t +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a4, 48 +; RV64-NEXT: li a4, 56 ; RV64-NEXT: mul a1, a1, a4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 32 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: mv a1, a7 ; RV64-NEXT: bltu a7, a3, .LBB16_12 ; RV64-NEXT: # %bb.11: ; RV64-NEXT: li a1, 32 ; RV64-NEXT: .LBB16_12: -; RV64-NEXT: vmv1r.v v0, v30 +; RV64-NEXT: addi a4, sp, 32 +; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vmv4r.v v24, v16 ; RV64-NEXT: csrr a4, vlenb ; RV64-NEXT: li a5, 24 ; RV64-NEXT: mul a4, a4, a5 ; RV64-NEXT: add a4, sp, a4 ; RV64-NEXT: addi a4, a4, 32 -; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vmv4r.v v8, v24 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 4 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 32 -; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload ; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: li a5, 24 +; RV64-NEXT: li a5, 72 ; RV64-NEXT: mul a4, a4, a5 ; RV64-NEXT: add a4, sp, a4 ; RV64-NEXT: addi a4, a4, 32 -; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 6 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 32 ; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV64-NEXT: vslideup.vi v16, v8, 16 +; RV64-NEXT: vslideup.vi v16, v24, 16 ; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 6 +; RV64-NEXT: li a5, 72 +; RV64-NEXT: mul a4, a4, a5 ; RV64-NEXT: add a4, sp, a4 ; RV64-NEXT: addi a4, a4, 32 ; RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV64-NEXT: addi a4, a1, -16 ; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 56 -; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: slli a5, a5, 6 ; RV64-NEXT: add a5, sp, a5 ; RV64-NEXT: addi a5, a5, 32 ; RV64-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v16, v24, 16 +; RV64-NEXT: vslideup.vi v16, v0, 16 ; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 56 -; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: slli a5, a5, 6 ; RV64-NEXT: add a5, sp, a5 ; RV64-NEXT: addi a5, a5, 32 ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 48 +; RV64-NEXT: li a6, 56 ; RV64-NEXT: mul a5, a5, a6 ; RV64-NEXT: add a5, sp, a5 ; RV64-NEXT: addi a5, a5, 32 ; RV64-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 24 -; RV64-NEXT: mul a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vslideup.vi v16, v8, 16 ; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: li a6, 48 +; RV64-NEXT: li a6, 56 ; RV64-NEXT: mul a5, a5, a6 ; RV64-NEXT: add a5, sp, a5 ; RV64-NEXT: addi a5, a5, 32 @@ -788,7 +806,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a1, a1, a4 ; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 5 +; RV64-NEXT: slli a4, a4, 4 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: li a5, 40 +; RV64-NEXT: mul a4, a4, a5 ; RV64-NEXT: add a4, sp, a4 ; RV64-NEXT: addi a4, a4, 32 ; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload @@ -798,9 +822,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV64-NEXT: # %bb.13: ; RV64-NEXT: li a7, 16 ; RV64-NEXT: .LBB16_14: -; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 48 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 32 @@ -812,7 +840,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV64-NEXT: vse32.v v24, (a0) ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 48 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 32 @@ -820,21 +848,21 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; RV64-NEXT: vse32.v v8, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 56 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a2, a2, 6 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 32 ; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse32.v v8, (a1) ; RV64-NEXT: addi a0, a0, 384 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: li a2, 72 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 32 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 72 +; RV64-NEXT: li a1, 80 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: .cfi_def_cfa sp, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll index b2279dca45d8d..88ea53c38d01d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll @@ -11,8 +11,8 @@ define <8 x i7> @vdiv_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll index dfd509062ccf7..8604b3834b515 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll @@ -57,8 +57,8 @@ define <1 x i1> @fcmp_ogt_vf_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -73,8 +73,8 @@ define <1 x i1> @fcmp_ogt_fv_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -102,8 +102,8 @@ define <1 x i1> @fcmp_oge_vf_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -118,8 +118,8 @@ define <1 x i1> @fcmp_oge_fv_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -147,8 +147,8 @@ define <1 x i1> @fcmp_olt_vf_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -163,8 +163,8 @@ define <1 x i1> @fcmp_olt_fv_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -192,8 +192,8 @@ define <1 x i1> @fcmp_ole_vf_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -208,8 +208,8 @@ define <1 x i1> @fcmp_ole_fv_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -388,8 +388,8 @@ define <1 x i1> @fcmp_ugt_vf_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -405,8 +405,8 @@ define <1 x i1> @fcmp_ugt_fv_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -436,8 +436,8 @@ define <1 x i1> @fcmp_uge_vf_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -453,8 +453,8 @@ define <1 x i1> @fcmp_uge_fv_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -484,8 +484,8 @@ define <1 x i1> @fcmp_ult_vf_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -501,8 +501,8 @@ define <1 x i1> @fcmp_ult_fv_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -532,8 +532,8 @@ define <1 x i1> @fcmp_ule_vf_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -549,8 +549,8 @@ define <1 x i1> @fcmp_ule_fv_v1f16(<1 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -690,8 +690,8 @@ define <2 x i1> @fcmp_ogt_vf_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -706,8 +706,8 @@ define <2 x i1> @fcmp_ogt_fv_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -735,8 +735,8 @@ define <2 x i1> @fcmp_oge_vf_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -751,8 +751,8 @@ define <2 x i1> @fcmp_oge_fv_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -780,8 +780,8 @@ define <2 x i1> @fcmp_olt_vf_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -796,8 +796,8 @@ define <2 x i1> @fcmp_olt_fv_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -825,8 +825,8 @@ define <2 x i1> @fcmp_ole_vf_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -841,8 +841,8 @@ define <2 x i1> @fcmp_ole_fv_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1021,8 +1021,8 @@ define <2 x i1> @fcmp_ugt_vf_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1038,8 +1038,8 @@ define <2 x i1> @fcmp_ugt_fv_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1069,8 +1069,8 @@ define <2 x i1> @fcmp_uge_vf_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1086,8 +1086,8 @@ define <2 x i1> @fcmp_uge_fv_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1117,8 +1117,8 @@ define <2 x i1> @fcmp_ult_vf_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1134,8 +1134,8 @@ define <2 x i1> @fcmp_ult_fv_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1165,8 +1165,8 @@ define <2 x i1> @fcmp_ule_vf_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1182,8 +1182,8 @@ define <2 x i1> @fcmp_ule_fv_v2f16(<2 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1323,8 +1323,8 @@ define <4 x i1> @fcmp_ogt_vf_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1339,8 +1339,8 @@ define <4 x i1> @fcmp_ogt_fv_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1368,8 +1368,8 @@ define <4 x i1> @fcmp_oge_vf_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1384,8 +1384,8 @@ define <4 x i1> @fcmp_oge_fv_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1413,8 +1413,8 @@ define <4 x i1> @fcmp_olt_vf_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1429,8 +1429,8 @@ define <4 x i1> @fcmp_olt_fv_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1458,8 +1458,8 @@ define <4 x i1> @fcmp_ole_vf_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1474,8 +1474,8 @@ define <4 x i1> @fcmp_ole_fv_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1654,8 +1654,8 @@ define <4 x i1> @fcmp_ugt_vf_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1671,8 +1671,8 @@ define <4 x i1> @fcmp_ugt_fv_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1702,8 +1702,8 @@ define <4 x i1> @fcmp_uge_vf_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1719,8 +1719,8 @@ define <4 x i1> @fcmp_uge_fv_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1750,8 +1750,8 @@ define <4 x i1> @fcmp_ult_vf_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1767,8 +1767,8 @@ define <4 x i1> @fcmp_ult_fv_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1798,8 +1798,8 @@ define <4 x i1> @fcmp_ule_vf_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1815,8 +1815,8 @@ define <4 x i1> @fcmp_ule_fv_v4f16(<4 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1956,8 +1956,8 @@ define <8 x i1> @fcmp_ogt_vf_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1972,8 +1972,8 @@ define <8 x i1> @fcmp_ogt_fv_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -2001,8 +2001,8 @@ define <8 x i1> @fcmp_oge_vf_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -2017,8 +2017,8 @@ define <8 x i1> @fcmp_oge_fv_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -2046,8 +2046,8 @@ define <8 x i1> @fcmp_olt_vf_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -2062,8 +2062,8 @@ define <8 x i1> @fcmp_olt_fv_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -2091,8 +2091,8 @@ define <8 x i1> @fcmp_ole_vf_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -2107,8 +2107,8 @@ define <8 x i1> @fcmp_ole_fv_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -2287,8 +2287,8 @@ define <8 x i1> @fcmp_ugt_vf_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -2304,8 +2304,8 @@ define <8 x i1> @fcmp_ugt_fv_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -2335,8 +2335,8 @@ define <8 x i1> @fcmp_uge_vf_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -2352,8 +2352,8 @@ define <8 x i1> @fcmp_uge_fv_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -2383,8 +2383,8 @@ define <8 x i1> @fcmp_ult_vf_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -2400,8 +2400,8 @@ define <8 x i1> @fcmp_ult_fv_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -2431,8 +2431,8 @@ define <8 x i1> @fcmp_ule_vf_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -2448,8 +2448,8 @@ define <8 x i1> @fcmp_ule_fv_v8f16(<8 x half> %va, half %b) nounwind strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -2577,8 +2577,8 @@ define <16 x i1> @fcmp_ogt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2593,8 +2593,8 @@ define <16 x i1> @fcmp_ogt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2611,8 +2611,8 @@ define <16 x i1> @fcmp_ogt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2628,8 +2628,8 @@ define <16 x i1> @fcmp_oge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2644,8 +2644,8 @@ define <16 x i1> @fcmp_oge_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2662,8 +2662,8 @@ define <16 x i1> @fcmp_oge_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2679,8 +2679,8 @@ define <16 x i1> @fcmp_olt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2695,8 +2695,8 @@ define <16 x i1> @fcmp_olt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2713,8 +2713,8 @@ define <16 x i1> @fcmp_olt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2730,8 +2730,8 @@ define <16 x i1> @fcmp_ole_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2746,8 +2746,8 @@ define <16 x i1> @fcmp_ole_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2764,8 +2764,8 @@ define <16 x i1> @fcmp_ole_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2781,12 +2781,12 @@ define <16 x i1> @fcmp_one_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2799,12 +2799,12 @@ define <16 x i1> @fcmp_one_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2819,12 +2819,12 @@ define <16 x i1> @fcmp_one_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2880,12 +2880,12 @@ define <16 x i1> @fcmp_ueq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2898,12 +2898,12 @@ define <16 x i1> @fcmp_ueq_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2918,12 +2918,12 @@ define <16 x i1> @fcmp_ueq_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2937,8 +2937,8 @@ define <16 x i1> @fcmp_ugt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -2953,8 +2953,8 @@ define <16 x i1> @fcmp_ugt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -2971,8 +2971,8 @@ define <16 x i1> @fcmp_ugt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -2988,8 +2988,8 @@ define <16 x i1> @fcmp_uge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3004,8 +3004,8 @@ define <16 x i1> @fcmp_uge_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -3022,8 +3022,8 @@ define <16 x i1> @fcmp_uge_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -3039,8 +3039,8 @@ define <16 x i1> @fcmp_ult_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3055,8 +3055,8 @@ define <16 x i1> @fcmp_ult_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -3073,8 +3073,8 @@ define <16 x i1> @fcmp_ult_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -3090,8 +3090,8 @@ define <16 x i1> @fcmp_ule_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3106,8 +3106,8 @@ define <16 x i1> @fcmp_ule_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -3124,8 +3124,8 @@ define <16 x i1> @fcmp_ule_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -3256,8 +3256,8 @@ define <32 x i1> @fcmp_ogt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3273,8 +3273,8 @@ define <32 x i1> @fcmp_ogt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -3292,8 +3292,8 @@ define <32 x i1> @fcmp_ogt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -3310,8 +3310,8 @@ define <32 x i1> @fcmp_oge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3327,8 +3327,8 @@ define <32 x i1> @fcmp_oge_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -3346,8 +3346,8 @@ define <32 x i1> @fcmp_oge_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -3364,8 +3364,8 @@ define <32 x i1> @fcmp_olt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3381,8 +3381,8 @@ define <32 x i1> @fcmp_olt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -3400,8 +3400,8 @@ define <32 x i1> @fcmp_olt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -3418,8 +3418,8 @@ define <32 x i1> @fcmp_ole_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3435,8 +3435,8 @@ define <32 x i1> @fcmp_ole_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -3454,8 +3454,8 @@ define <32 x i1> @fcmp_ole_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -3472,12 +3472,12 @@ define <32 x i1> @fcmp_one_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3491,12 +3491,12 @@ define <32 x i1> @fcmp_one_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3512,12 +3512,12 @@ define <32 x i1> @fcmp_one_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3577,12 +3577,12 @@ define <32 x i1> @fcmp_ueq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3596,13 +3596,13 @@ define <32 x i1> @fcmp_ueq_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 -; CHECK-NEXT: ret +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %splat, metadata !"ueq", metadata !"fpexcept.strict") strictfp @@ -3617,12 +3617,12 @@ define <32 x i1> @fcmp_ueq_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3637,8 +3637,8 @@ define <32 x i1> @fcmp_ugt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3654,8 +3654,8 @@ define <32 x i1> @fcmp_ugt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3673,8 +3673,8 @@ define <32 x i1> @fcmp_ugt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3691,8 +3691,8 @@ define <32 x i1> @fcmp_uge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3708,8 +3708,8 @@ define <32 x i1> @fcmp_uge_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3727,8 +3727,8 @@ define <32 x i1> @fcmp_uge_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3745,8 +3745,8 @@ define <32 x i1> @fcmp_ult_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3762,8 +3762,8 @@ define <32 x i1> @fcmp_ult_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3781,8 +3781,8 @@ define <32 x i1> @fcmp_ult_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3799,8 +3799,8 @@ define <32 x i1> @fcmp_ule_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3816,8 +3816,8 @@ define <32 x i1> @fcmp_ule_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3835,8 +3835,8 @@ define <32 x i1> @fcmp_ule_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3981,8 +3981,8 @@ define <1 x i1> @fcmp_ogt_vf_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -3997,8 +3997,8 @@ define <1 x i1> @fcmp_ogt_fv_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4026,8 +4026,8 @@ define <1 x i1> @fcmp_oge_vf_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4042,8 +4042,8 @@ define <1 x i1> @fcmp_oge_fv_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4071,8 +4071,8 @@ define <1 x i1> @fcmp_olt_vf_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4087,8 +4087,8 @@ define <1 x i1> @fcmp_olt_fv_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4116,8 +4116,8 @@ define <1 x i1> @fcmp_ole_vf_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4132,8 +4132,8 @@ define <1 x i1> @fcmp_ole_fv_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4312,8 +4312,8 @@ define <1 x i1> @fcmp_ugt_vf_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4329,8 +4329,8 @@ define <1 x i1> @fcmp_ugt_fv_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4360,8 +4360,8 @@ define <1 x i1> @fcmp_uge_vf_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4377,8 +4377,8 @@ define <1 x i1> @fcmp_uge_fv_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4408,8 +4408,8 @@ define <1 x i1> @fcmp_ult_vf_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4425,8 +4425,8 @@ define <1 x i1> @fcmp_ult_fv_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4456,8 +4456,8 @@ define <1 x i1> @fcmp_ule_vf_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4473,8 +4473,8 @@ define <1 x i1> @fcmp_ule_fv_v1f32(<1 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4614,8 +4614,8 @@ define <2 x i1> @fcmp_ogt_vf_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4630,8 +4630,8 @@ define <2 x i1> @fcmp_ogt_fv_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4659,8 +4659,8 @@ define <2 x i1> @fcmp_oge_vf_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4675,8 +4675,8 @@ define <2 x i1> @fcmp_oge_fv_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4704,8 +4704,8 @@ define <2 x i1> @fcmp_olt_vf_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4720,8 +4720,8 @@ define <2 x i1> @fcmp_olt_fv_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4749,8 +4749,8 @@ define <2 x i1> @fcmp_ole_vf_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4765,8 +4765,8 @@ define <2 x i1> @fcmp_ole_fv_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4945,8 +4945,8 @@ define <2 x i1> @fcmp_ugt_vf_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4962,8 +4962,8 @@ define <2 x i1> @fcmp_ugt_fv_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4993,8 +4993,8 @@ define <2 x i1> @fcmp_uge_vf_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5010,8 +5010,8 @@ define <2 x i1> @fcmp_uge_fv_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5041,8 +5041,8 @@ define <2 x i1> @fcmp_ult_vf_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5058,8 +5058,8 @@ define <2 x i1> @fcmp_ult_fv_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5089,8 +5089,8 @@ define <2 x i1> @fcmp_ule_vf_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5106,8 +5106,8 @@ define <2 x i1> @fcmp_ule_fv_v2f32(<2 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5247,8 +5247,8 @@ define <4 x i1> @fcmp_ogt_vf_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -5263,8 +5263,8 @@ define <4 x i1> @fcmp_ogt_fv_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -5292,8 +5292,8 @@ define <4 x i1> @fcmp_oge_vf_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -5308,8 +5308,8 @@ define <4 x i1> @fcmp_oge_fv_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -5337,8 +5337,8 @@ define <4 x i1> @fcmp_olt_vf_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -5353,8 +5353,8 @@ define <4 x i1> @fcmp_olt_fv_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -5382,8 +5382,8 @@ define <4 x i1> @fcmp_ole_vf_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -5398,8 +5398,8 @@ define <4 x i1> @fcmp_ole_fv_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -5578,8 +5578,8 @@ define <4 x i1> @fcmp_ugt_vf_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5595,8 +5595,8 @@ define <4 x i1> @fcmp_ugt_fv_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5626,8 +5626,8 @@ define <4 x i1> @fcmp_uge_vf_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5643,8 +5643,8 @@ define <4 x i1> @fcmp_uge_fv_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5674,8 +5674,8 @@ define <4 x i1> @fcmp_ult_vf_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5691,8 +5691,8 @@ define <4 x i1> @fcmp_ult_fv_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5722,8 +5722,8 @@ define <4 x i1> @fcmp_ule_vf_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5739,8 +5739,8 @@ define <4 x i1> @fcmp_ule_fv_v4f32(<4 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5868,8 +5868,8 @@ define <8 x i1> @fcmp_ogt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -5884,8 +5884,8 @@ define <8 x i1> @fcmp_ogt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5902,8 +5902,8 @@ define <8 x i1> @fcmp_ogt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5919,8 +5919,8 @@ define <8 x i1> @fcmp_oge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -5935,8 +5935,8 @@ define <8 x i1> @fcmp_oge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5953,8 +5953,8 @@ define <8 x i1> @fcmp_oge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5970,8 +5970,8 @@ define <8 x i1> @fcmp_olt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -5986,8 +5986,8 @@ define <8 x i1> @fcmp_olt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -6004,8 +6004,8 @@ define <8 x i1> @fcmp_olt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -6021,8 +6021,8 @@ define <8 x i1> @fcmp_ole_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6037,8 +6037,8 @@ define <8 x i1> @fcmp_ole_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -6055,8 +6055,8 @@ define <8 x i1> @fcmp_ole_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -6072,12 +6072,12 @@ define <8 x i1> @fcmp_one_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6090,12 +6090,12 @@ define <8 x i1> @fcmp_one_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6110,12 +6110,12 @@ define <8 x i1> @fcmp_one_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6171,12 +6171,12 @@ define <8 x i1> @fcmp_ueq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6189,12 +6189,12 @@ define <8 x i1> @fcmp_ueq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6209,12 +6209,12 @@ define <8 x i1> @fcmp_ueq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6228,8 +6228,8 @@ define <8 x i1> @fcmp_ugt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6244,8 +6244,8 @@ define <8 x i1> @fcmp_ugt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -6262,8 +6262,8 @@ define <8 x i1> @fcmp_ugt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -6279,8 +6279,8 @@ define <8 x i1> @fcmp_uge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6295,8 +6295,8 @@ define <8 x i1> @fcmp_uge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -6313,8 +6313,8 @@ define <8 x i1> @fcmp_uge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -6330,8 +6330,8 @@ define <8 x i1> @fcmp_ult_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6346,8 +6346,8 @@ define <8 x i1> @fcmp_ult_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -6364,8 +6364,8 @@ define <8 x i1> @fcmp_ult_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -6381,8 +6381,8 @@ define <8 x i1> @fcmp_ule_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6397,8 +6397,8 @@ define <8 x i1> @fcmp_ule_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -6415,8 +6415,8 @@ define <8 x i1> @fcmp_ule_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -6543,8 +6543,8 @@ define <16 x i1> @fcmp_ogt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6559,8 +6559,8 @@ define <16 x i1> @fcmp_ogt_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6577,8 +6577,8 @@ define <16 x i1> @fcmp_ogt_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6594,8 +6594,8 @@ define <16 x i1> @fcmp_oge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6610,8 +6610,8 @@ define <16 x i1> @fcmp_oge_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6628,8 +6628,8 @@ define <16 x i1> @fcmp_oge_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6645,8 +6645,8 @@ define <16 x i1> @fcmp_olt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6661,8 +6661,8 @@ define <16 x i1> @fcmp_olt_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6679,8 +6679,8 @@ define <16 x i1> @fcmp_olt_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6696,8 +6696,8 @@ define <16 x i1> @fcmp_ole_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6712,8 +6712,8 @@ define <16 x i1> @fcmp_ole_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6730,8 +6730,8 @@ define <16 x i1> @fcmp_ole_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6747,12 +6747,12 @@ define <16 x i1> @fcmp_one_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6765,12 +6765,12 @@ define <16 x i1> @fcmp_one_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6785,12 +6785,12 @@ define <16 x i1> @fcmp_one_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6846,12 +6846,12 @@ define <16 x i1> @fcmp_ueq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6864,12 +6864,12 @@ define <16 x i1> @fcmp_ueq_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6884,12 +6884,12 @@ define <16 x i1> @fcmp_ueq_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6903,8 +6903,8 @@ define <16 x i1> @fcmp_ugt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -6919,8 +6919,8 @@ define <16 x i1> @fcmp_ugt_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6937,8 +6937,8 @@ define <16 x i1> @fcmp_ugt_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6954,8 +6954,8 @@ define <16 x i1> @fcmp_uge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -6970,8 +6970,8 @@ define <16 x i1> @fcmp_uge_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6988,8 +6988,8 @@ define <16 x i1> @fcmp_uge_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -7005,8 +7005,8 @@ define <16 x i1> @fcmp_ult_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -7021,8 +7021,8 @@ define <16 x i1> @fcmp_ult_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -7039,8 +7039,8 @@ define <16 x i1> @fcmp_ult_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -7056,8 +7056,8 @@ define <16 x i1> @fcmp_ule_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -7072,8 +7072,8 @@ define <16 x i1> @fcmp_ule_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -7090,8 +7090,8 @@ define <16 x i1> @fcmp_ule_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -7230,8 +7230,8 @@ define <1 x i1> @fcmp_ogt_vf_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7246,8 +7246,8 @@ define <1 x i1> @fcmp_ogt_fv_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7275,8 +7275,8 @@ define <1 x i1> @fcmp_oge_vf_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7291,8 +7291,8 @@ define <1 x i1> @fcmp_oge_fv_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7320,8 +7320,8 @@ define <1 x i1> @fcmp_olt_vf_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7336,8 +7336,8 @@ define <1 x i1> @fcmp_olt_fv_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7365,8 +7365,8 @@ define <1 x i1> @fcmp_ole_vf_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7381,8 +7381,8 @@ define <1 x i1> @fcmp_ole_fv_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7561,8 +7561,8 @@ define <1 x i1> @fcmp_ugt_vf_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7578,8 +7578,8 @@ define <1 x i1> @fcmp_ugt_fv_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7609,8 +7609,8 @@ define <1 x i1> @fcmp_uge_vf_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7626,8 +7626,8 @@ define <1 x i1> @fcmp_uge_fv_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7657,8 +7657,8 @@ define <1 x i1> @fcmp_ult_vf_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7674,8 +7674,8 @@ define <1 x i1> @fcmp_ult_fv_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7705,8 +7705,8 @@ define <1 x i1> @fcmp_ule_vf_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7722,8 +7722,8 @@ define <1 x i1> @fcmp_ule_fv_v1f64(<1 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7863,8 +7863,8 @@ define <2 x i1> @fcmp_ogt_vf_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7879,8 +7879,8 @@ define <2 x i1> @fcmp_ogt_fv_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7908,8 +7908,8 @@ define <2 x i1> @fcmp_oge_vf_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7924,8 +7924,8 @@ define <2 x i1> @fcmp_oge_fv_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7953,8 +7953,8 @@ define <2 x i1> @fcmp_olt_vf_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7969,8 +7969,8 @@ define <2 x i1> @fcmp_olt_fv_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7998,8 +7998,8 @@ define <2 x i1> @fcmp_ole_vf_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -8014,8 +8014,8 @@ define <2 x i1> @fcmp_ole_fv_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -8194,8 +8194,8 @@ define <2 x i1> @fcmp_ugt_vf_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -8211,8 +8211,8 @@ define <2 x i1> @fcmp_ugt_fv_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -8242,8 +8242,8 @@ define <2 x i1> @fcmp_uge_vf_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -8259,8 +8259,8 @@ define <2 x i1> @fcmp_uge_fv_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -8290,8 +8290,8 @@ define <2 x i1> @fcmp_ult_vf_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -8307,8 +8307,8 @@ define <2 x i1> @fcmp_ult_fv_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -8338,8 +8338,8 @@ define <2 x i1> @fcmp_ule_vf_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -8355,8 +8355,8 @@ define <2 x i1> @fcmp_ule_fv_v2f64(<2 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -8484,8 +8484,8 @@ define <4 x i1> @fcmp_ogt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8500,8 +8500,8 @@ define <4 x i1> @fcmp_ogt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8518,8 +8518,8 @@ define <4 x i1> @fcmp_ogt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8535,8 +8535,8 @@ define <4 x i1> @fcmp_oge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8551,8 +8551,8 @@ define <4 x i1> @fcmp_oge_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8569,8 +8569,8 @@ define <4 x i1> @fcmp_oge_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8586,8 +8586,8 @@ define <4 x i1> @fcmp_olt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8602,8 +8602,8 @@ define <4 x i1> @fcmp_olt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8620,8 +8620,8 @@ define <4 x i1> @fcmp_olt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8637,8 +8637,8 @@ define <4 x i1> @fcmp_ole_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8653,8 +8653,8 @@ define <4 x i1> @fcmp_ole_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8671,8 +8671,8 @@ define <4 x i1> @fcmp_ole_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8688,12 +8688,12 @@ define <4 x i1> @fcmp_one_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8706,12 +8706,12 @@ define <4 x i1> @fcmp_one_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8726,12 +8726,12 @@ define <4 x i1> @fcmp_one_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8787,12 +8787,12 @@ define <4 x i1> @fcmp_ueq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8805,12 +8805,12 @@ define <4 x i1> @fcmp_ueq_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8825,12 +8825,12 @@ define <4 x i1> @fcmp_ueq_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8844,8 +8844,8 @@ define <4 x i1> @fcmp_ugt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -8860,8 +8860,8 @@ define <4 x i1> @fcmp_ugt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8878,8 +8878,8 @@ define <4 x i1> @fcmp_ugt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8895,8 +8895,8 @@ define <4 x i1> @fcmp_uge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v10, v10 ; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -8911,8 +8911,8 @@ define <4 x i1> @fcmp_uge_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8929,8 +8929,8 @@ define <4 x i1> @fcmp_uge_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8946,8 +8946,8 @@ define <4 x i1> @fcmp_ult_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -8962,8 +8962,8 @@ define <4 x i1> @fcmp_ult_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8980,8 +8980,8 @@ define <4 x i1> @fcmp_ult_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8997,8 +8997,8 @@ define <4 x i1> @fcmp_ule_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v12, v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9013,8 +9013,8 @@ define <4 x i1> @fcmp_ule_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -9031,8 +9031,8 @@ define <4 x i1> @fcmp_ule_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -9159,8 +9159,8 @@ define <8 x i1> @fcmp_ogt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9175,8 +9175,8 @@ define <8 x i1> @fcmp_ogt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -9193,8 +9193,8 @@ define <8 x i1> @fcmp_ogt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -9210,8 +9210,8 @@ define <8 x i1> @fcmp_oge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9226,8 +9226,8 @@ define <8 x i1> @fcmp_oge_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -9244,8 +9244,8 @@ define <8 x i1> @fcmp_oge_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -9261,8 +9261,8 @@ define <8 x i1> @fcmp_olt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9277,8 +9277,8 @@ define <8 x i1> @fcmp_olt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -9295,8 +9295,8 @@ define <8 x i1> @fcmp_olt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -9312,8 +9312,8 @@ define <8 x i1> @fcmp_ole_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9328,8 +9328,8 @@ define <8 x i1> @fcmp_ole_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -9346,8 +9346,8 @@ define <8 x i1> @fcmp_ole_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -9363,12 +9363,12 @@ define <8 x i1> @fcmp_one_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9381,12 +9381,12 @@ define <8 x i1> @fcmp_one_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9401,12 +9401,12 @@ define <8 x i1> @fcmp_one_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9462,12 +9462,12 @@ define <8 x i1> @fcmp_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9480,12 +9480,12 @@ define <8 x i1> @fcmp_ueq_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9500,12 +9500,12 @@ define <8 x i1> @fcmp_ueq_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9519,8 +9519,8 @@ define <8 x i1> @fcmp_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9535,8 +9535,8 @@ define <8 x i1> @fcmp_ugt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9553,8 +9553,8 @@ define <8 x i1> @fcmp_ugt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9570,8 +9570,8 @@ define <8 x i1> @fcmp_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v12, v12 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9586,8 +9586,8 @@ define <8 x i1> @fcmp_uge_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9604,8 +9604,8 @@ define <8 x i1> @fcmp_uge_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9621,8 +9621,8 @@ define <8 x i1> @fcmp_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9637,8 +9637,8 @@ define <8 x i1> @fcmp_ult_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9655,8 +9655,8 @@ define <8 x i1> @fcmp_ult_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9672,8 +9672,8 @@ define <8 x i1> @fcmp_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v16, v8, v8 ; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v16, v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9688,8 +9688,8 @@ define <8 x i1> @fcmp_ule_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9706,8 +9706,8 @@ define <8 x i1> @fcmp_ule_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index fec54b36042fa..e310349dd93f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -11,8 +11,8 @@ define <8 x i7> @vmax_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index ea75742ca6e43..612f39b21a3fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -11,8 +11,8 @@ define <8 x i7> @vmin_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index 83a195a66a502..416f34d3ccdd0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -298,11 +298,11 @@ define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> % ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -2052,13 +2052,13 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t ; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 16 ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: vmv8r.v v8, v24 @@ -2077,9 +2077,9 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex ; RV64-NEXT: addi a1, a0, -16 ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t ; RV64-NEXT: ret @@ -2103,13 +2103,13 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2134,9 +2134,9 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2161,13 +2161,13 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2193,9 +2193,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2225,9 +2225,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2251,9 +2251,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV64-NEXT: ret @@ -2283,9 +2283,9 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2310,9 +2310,9 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2341,9 +2341,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2369,9 +2369,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2401,9 +2401,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2427,9 +2427,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV64-NEXT: ret @@ -2454,13 +2454,13 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2484,9 +2484,9 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2510,13 +2510,13 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2540,9 +2540,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2567,13 +2567,13 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2597,9 +2597,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2618,16 +2618,16 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV32-NEXT: vnsrl.wi v16, v8, 0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vslideup.vi v16, v24, 16 ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: sltu a2, a1, a3 ; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: li a2, 16 @@ -2656,9 +2656,9 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 8e2e8f3fb0dec..dc55bceae6eb7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -374,12 +374,12 @@ define <32 x double> @vpload_v32f64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: addi a2, a1, -16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -396,36 +396,36 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: li a4, 32 -; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: bltu a2, a4, .LBB32_2 -; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: bltu a2, a3, .LBB32_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a4, 32 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: addi a5, a3, -16 -; CHECK-NEXT: addi a4, a1, 128 +; CHECK-NEXT: addi a5, a4, -16 +; CHECK-NEXT: addi a3, a1, 128 ; CHECK-NEXT: addi a7, a2, -32 -; CHECK-NEXT: sltu a3, a3, a5 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a6, a3, a5 -; CHECK-NEXT: sltu a3, a2, a7 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a5, a3, a7 -; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: sltu a4, a4, a5 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a6, a4, a5 +; CHECK-NEXT: sltu a4, a2, a7 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a5, a4, a7 +; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: bltu a5, a3, .LBB32_4 +; CHECK-NEXT: bltu a5, a4, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a5, 16 ; CHECK-NEXT: .LBB32_4: ; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a4), v0.t +; CHECK-NEXT: vle64.v v16, (a3), v0.t +; CHECK-NEXT: addi a3, a1, 256 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: addi a4, a1, 256 ; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a4), v0.t -; CHECK-NEXT: bltu a2, a3, .LBB32_6 +; CHECK-NEXT: vle64.v v24, (a3), v0.t +; CHECK-NEXT: bltu a2, a4, .LBB32_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB32_6: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index 6394542479d1b..9c16454e5cc84 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1373,9 +1373,9 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3 ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma @@ -1406,9 +1406,9 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index f7e4716d2c847..b58a06e26334d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1756,13 +1756,13 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t ; RV32-NEXT: addi a0, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: sltu a1, a1, a0 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: ret @@ -1792,9 +1792,9 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m ; RV64-NEXT: addi a0, a2, -16 ; RV64-NEXT: sltu a1, a2, a0 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1826,13 +1826,13 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: addi a1, a2, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1874,9 +1874,9 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 @@ -1912,13 +1912,13 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: addi a1, a2, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1960,9 +1960,9 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 @@ -1999,13 +1999,13 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: addi a1, a2, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -2047,9 +2047,9 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll index d30e8b46e6df2..d3a8e8548f5b4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -304,12 +304,12 @@ define void @vpstore_v32f64(<32 x double> %val, ptr %ptr, <32 x i1> %m, i32 zero ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: addi a2, a1, -16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll index b3d35a51280ac..258248499a5b3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll @@ -11,8 +11,8 @@ define <8 x i7> @vrem_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vrem.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index 9be6b92699e0d..79990ccb351c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -213,13 +213,13 @@ define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: addi a0, a1, 128 ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 @@ -412,9 +412,9 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -436,36 +436,26 @@ define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vle64.v v16, (a0) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -599,9 +589,9 @@ define <64 x float> @select_v64f32(<64 x i1> %a, <64 x float> %b, <64 x float> % ; CHECK-NEXT: addi a0, a2, -32 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 984bc5b2c7352..cf590c25efc65 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -19,11 +19,11 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: srli a4, a4, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: slli a4, a2, 27 -; RV32-NEXT: srli a2, a2, 5 ; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: srli a4, a4, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslide1down.vx v10, v10, a4 +; RV32-NEXT: srli a2, a2, 5 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -50,11 +50,11 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: srli a4, a4, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: slli a4, a2, 59 -; RV64-NEXT: srli a2, a2, 5 ; RV64-NEXT: srli a1, a1, 63 ; RV64-NEXT: srli a4, a4, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslide1down.vx v10, v10, a4 +; RV64-NEXT: srli a2, a2, 5 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -89,11 +89,11 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: srli a4, a4, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: slli a4, a2, 27 -; RV32-NEXT: srli a2, a2, 5 ; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: srli a4, a4, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslide1down.vx v10, v10, a4 +; RV32-NEXT: srli a2, a2, 5 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -120,11 +120,11 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: srli a4, a4, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: slli a4, a2, 59 -; RV64-NEXT: srli a2, a2, 5 ; RV64-NEXT: srli a1, a1, 63 ; RV64-NEXT: srli a4, a4, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslide1down.vx v10, v10, a4 +; RV64-NEXT: srli a2, a2, 5 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -160,11 +160,11 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: srli a3, a3, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: slli a3, a1, 27 -; RV32-NEXT: srli a1, a1, 5 ; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: srli a3, a3, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vslide1down.vx v10, v10, a3 +; RV32-NEXT: srli a1, a1, 5 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -191,11 +191,11 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: srli a3, a3, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: slli a3, a1, 59 -; RV64-NEXT: srli a1, a1, 5 ; RV64-NEXT: srli a0, a0, 63 ; RV64-NEXT: srli a3, a3, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vslide1down.vx v10, v10, a3 +; RV64-NEXT: srli a1, a1, 5 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -230,11 +230,11 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: srli a4, a4, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: slli a4, a2, 27 -; RV32-NEXT: srli a2, a2, 5 ; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: srli a4, a4, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslide1down.vx v10, v10, a4 +; RV32-NEXT: srli a2, a2, 5 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -261,11 +261,11 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: srli a4, a4, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: slli a4, a2, 59 -; RV64-NEXT: srli a2, a2, 5 ; RV64-NEXT: srli a1, a1, 63 ; RV64-NEXT: srli a4, a4, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslide1down.vx v10, v10, a4 +; RV64-NEXT: srli a2, a2, 5 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -300,11 +300,11 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: srli a3, a3, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: slli a3, a1, 27 -; RV32-NEXT: srli a1, a1, 5 ; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: srli a3, a3, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vslide1down.vx v10, v10, a3 +; RV32-NEXT: srli a1, a1, 5 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -331,11 +331,11 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: srli a3, a3, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: slli a3, a1, 59 -; RV64-NEXT: srli a1, a1, 5 ; RV64-NEXT: srli a0, a0, 63 ; RV64-NEXT: srli a3, a3, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vslide1down.vx v10, v10, a3 +; RV64-NEXT: srli a1, a1, 5 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -371,11 +371,11 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: srli a3, a3, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: slli a3, a1, 27 -; RV32-NEXT: srli a1, a1, 5 ; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: srli a3, a3, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vslide1down.vx v10, v10, a3 +; RV32-NEXT: srli a1, a1, 5 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -402,11 +402,11 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: srli a3, a3, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: slli a3, a1, 59 -; RV64-NEXT: srli a1, a1, 5 ; RV64-NEXT: srli a0, a0, 63 ; RV64-NEXT: srli a3, a3, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vslide1down.vx v10, v10, a3 +; RV64-NEXT: srli a1, a1, 5 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma @@ -427,8 +427,8 @@ define void @vselect_vv_v8i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret @@ -444,8 +444,8 @@ define void @vselect_vx_v8i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret @@ -462,8 +462,8 @@ define void @vselect_vi_v8i32(ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret @@ -478,8 +478,8 @@ define void @vselect_vv_v8f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret @@ -495,8 +495,8 @@ define void @vselect_vx_v8f32(float %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vx_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret @@ -513,8 +513,8 @@ define void @vselect_vfpzero_v8f32(ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vfpzero_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret @@ -529,8 +529,8 @@ define void @vselect_vv_v16i16(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vv_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret @@ -546,8 +546,8 @@ define void @vselect_vx_v16i16(i16 signext %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vx_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret @@ -564,8 +564,8 @@ define void @vselect_vi_v16i16(ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vi_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, 4, v0 ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret @@ -581,8 +581,8 @@ define void @vselect_vv_v32f16(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a4, 32 ; CHECK-NEXT: vsetvli zero, a4, e16, m4, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret @@ -599,8 +599,8 @@ define void @vselect_vx_v32f16(half %a, ptr %b, ptr %cc, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret @@ -618,8 +618,8 @@ define void @vselect_vfpzero_v32f16(ptr %b, ptr %cc, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll index 180fafa9659b1..3a07df74c06b3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll @@ -11,8 +11,8 @@ define <8 x i7> @vsra_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t ; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll index 7bac239cfffea..47afc8b073563 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll @@ -5,13 +5,12 @@ define <8 x i64> @vwadd_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) { ; CHECK-LABEL: vwadd_wv_mask_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv2r.v v16, v8 ; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 -; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu -; CHECK-NEXT: vwadd.wv v8, v8, v16, v0.t +; CHECK-NEXT: vwadd.wv v12, v12, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %mask = icmp slt <8 x i32> %x, %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer @@ -23,13 +22,12 @@ define <8 x i64> @vwadd_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) { define <8 x i64> @vwaddu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) { ; CHECK-LABEL: vwaddu_wv_mask_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv2r.v v16, v8 ; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 -; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu -; CHECK-NEXT: vwaddu.wv v8, v8, v16, v0.t +; CHECK-NEXT: vwaddu.wv v12, v12, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %mask = icmp slt <8 x i32> %x, %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer @@ -60,13 +58,12 @@ define <8 x i64> @vwaddu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) { define <8 x i64> @vwadd_wv_mask_v8i32_commutative(<8 x i32> %x, <8 x i64> %y) { ; CHECK-LABEL: vwadd_wv_mask_v8i32_commutative: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv2r.v v16, v8 ; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 -; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu -; CHECK-NEXT: vwadd.wv v8, v8, v16, v0.t +; CHECK-NEXT: vwadd.wv v12, v12, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %mask = icmp slt <8 x i32> %x, %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll index eafea7292a54b..446a24b973e27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll @@ -5,13 +5,12 @@ define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) { ; CHECK-LABEL: vwsub_wv_mask_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv2r.v v16, v8 ; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 -; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu -; CHECK-NEXT: vwsub.wv v8, v8, v16, v0.t +; CHECK-NEXT: vwsub.wv v12, v12, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %mask = icmp slt <8 x i32> %x, %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer @@ -23,13 +22,12 @@ define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) { define <8 x i64> @vwsubu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) { ; CHECK-LABEL: vwsubu_wv_mask_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv2r.v v16, v8 ; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 -; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu -; CHECK-NEXT: vwsubu.wv v8, v8, v16, v0.t +; CHECK-NEXT: vwsubu.wv v12, v12, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %mask = icmp slt <8 x i32> %x, %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll index 4512d809995a4..af8958c4cfc16 100644 --- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll @@ -405,8 +405,8 @@ define @ceil_nxv1f32_to_si8( %x) { ; RV32-NEXT: vfabs.v v9, v8 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -424,8 +424,8 @@ define @ceil_nxv1f32_to_si8( %x) { ; RV64-NEXT: vfabs.v v9, v8 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -448,8 +448,8 @@ define @ceil_nxv1f32_to_ui8( %x) { ; RV32-NEXT: vfabs.v v9, v8 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -467,8 +467,8 @@ define @ceil_nxv1f32_to_ui8( %x) { ; RV64-NEXT: vfabs.v v9, v8 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -631,8 +631,8 @@ define @ceil_nxv4f32_to_si8( %x) { ; RV32-NEXT: vfabs.v v10, v8 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v10, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v10, fa5 ; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -650,8 +650,8 @@ define @ceil_nxv4f32_to_si8( %x) { ; RV64-NEXT: vfabs.v v10, v8 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v10, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v10, fa5 ; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -674,8 +674,8 @@ define @ceil_nxv4f32_to_ui8( %x) { ; RV32-NEXT: vfabs.v v10, v8 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v10, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v10, fa5 ; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -693,8 +693,8 @@ define @ceil_nxv4f32_to_ui8( %x) { ; RV64-NEXT: vfabs.v v10, v8 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v10, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v10, fa5 ; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index 6ebb03ff0297e..95d157b79a8d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -22,12 +22,12 @@ define @vp_floor_nxv1bf16( %va, @vp_floor_nxv1bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -76,12 +76,12 @@ define @vp_floor_nxv2bf16( %va, @vp_floor_nxv2bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -130,12 +130,12 @@ define @vp_floor_nxv4bf16( %va, @vp_floor_nxv4bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -184,12 +184,12 @@ define @vp_floor_nxv8bf16( %va, @vp_floor_nxv8bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -238,12 +238,12 @@ define @vp_floor_nxv16bf16( %va, @vp_floor_nxv16bf16_unmasked( @vp_floor_nxv32bf16( %va, @vp_floor_nxv32bf16( %va, @vp_floor_nxv32bf16( %va, @vp_floor_nxv32bf16_unmasked( @vp_floor_nxv32bf16_unmasked( @vp_floor_nxv32bf16_unmasked( @vp_floor_nxv1f16( %va, @vp_floor_nxv1f16( %va, @vp_floor_nxv1f16_unmasked( %va, i ; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -471,11 +471,11 @@ define @vp_floor_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -497,9 +497,9 @@ define @vp_floor_nxv2f16( %va, @vp_floor_nxv2f16( %va, @vp_floor_nxv2f16_unmasked( %va, i ; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -557,11 +557,11 @@ define @vp_floor_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -583,9 +583,9 @@ define @vp_floor_nxv4f16( %va, @vp_floor_nxv4f16( %va, @vp_floor_nxv4f16_unmasked( %va, i ; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -643,11 +643,11 @@ define @vp_floor_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -665,14 +665,14 @@ declare @llvm.vp.floor.nxv8f16(, @vp_floor_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI18_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -689,12 +689,12 @@ define @vp_floor_nxv8f16( %va, @vp_floor_nxv8f16_unmasked( %va, i ; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -731,11 +731,11 @@ define @vp_floor_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -753,14 +753,14 @@ declare @llvm.vp.floor.nxv16f16(, @vp_floor_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v12, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) ; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vmv1r.v v0, v12 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -777,12 +777,12 @@ define @vp_floor_nxv16f16( %va, @vp_floor_nxv16f16_unmasked( %va ; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -819,11 +819,11 @@ define @vp_floor_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -841,14 +841,14 @@ declare @llvm.vp.floor.nxv32f16(, @vp_floor_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v16, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vmv1r.v v0, v16 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -864,6 +864,7 @@ define @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16_unmasked( %va ; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -941,11 +941,12 @@ define @vp_floor_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: fsrmi a4, 2 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: vmv1r.v v6, v7 @@ -958,11 +959,10 @@ define @vp_floor_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v6, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a2, 2 ; ZVFHMIN-NEXT: vmv1r.v v0, v6 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t -; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: fsrm a4 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t @@ -975,10 +975,10 @@ define @vp_floor_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1000,9 +1000,9 @@ define @vp_floor_nxv1f32( %va, @vp_floor_nxv1f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1042,9 +1042,9 @@ define @vp_floor_nxv2f32( %va, @vp_floor_nxv2f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1081,13 +1081,13 @@ define @vp_floor_nxv4f32( %va, @vp_floor_nxv4f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1125,13 +1125,13 @@ define @vp_floor_nxv8f32( %va, @vp_floor_nxv8f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1169,13 +1169,13 @@ define @vp_floor_nxv16f32( %va, @vp_floor_nxv16f32_unmasked( % ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1216,9 +1216,9 @@ define @vp_floor_nxv1f64( %va, @vp_floor_nxv1f64_unmasked( %v ; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1254,14 +1254,14 @@ declare @llvm.vp.floor.nxv2f64(, @vp_floor_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -1281,8 +1281,8 @@ define @vp_floor_nxv2f64_unmasked( %v ; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1298,14 +1298,14 @@ declare @llvm.vp.floor.nxv4f64(, @vp_floor_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1325,8 +1325,8 @@ define @vp_floor_nxv4f64_unmasked( %v ; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1342,14 +1342,14 @@ declare @llvm.vp.floor.nxv7f64(, @vp_floor_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1369,8 +1369,8 @@ define @vp_floor_nxv7f64_unmasked( %v ; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1386,14 +1386,14 @@ declare @llvm.vp.floor.nxv8f64(, @vp_floor_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1413,8 +1413,8 @@ define @vp_floor_nxv8f64_unmasked( %v ; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1438,20 +1438,20 @@ define @vp_floor_nxv16f64( %va, @vp_floor_nxv16f64( %va, @vp_floor_nxv16f64_unmasked( ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: fsrmi a3, 2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a2, 2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a3 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -1502,8 +1503,8 @@ define @vp_floor_nxv16f64_unmasked( ; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll index 67a0f4b56b995..f18cd4df20430 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -22,16 +22,14 @@ define @vfmax_nxv1bf16_vv( %a, @vfmax_nxv2bf16_vv( %a, @vfmax_nxv4bf16_vv( %a, @vfmax_nxv8bf16_vv( %a, @vfmax_nxv16bf16_vv( %a, @vfmax_nxv16bf16_vv( %a, @llvm.maximum.nxv32bf16(, ) define @vfmax_nxv32bf16_vv( %a, %b) nounwind { -; ZVFH-LABEL: vfmax_nxv32bf16_vv: -; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: mv a1, a0 -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add a0, a0, a1 -; ZVFH-NEXT: sub sp, sp, a0 -; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFH-NEXT: vmv8r.v v24, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFH-NEXT: vmv8r.v v0, v8 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v24 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v0 -; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v3, v16, v16 -; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFH-NEXT: vmv1r.v v0, v3 -; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v12 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v4 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFH-NEXT: vfmax.vv v16, v0, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v7, v24, v24 -; ZVFH-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vmerge.vvm v8, v24, v8, v0 -; ZVFH-NEXT: vfmax.vv v16, v8, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v24 -; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: mv a1, a0 -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add a0, a0, a1 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vfmax_nxv32bf16_vv: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 -; ZVFHMIN-NEXT: sub sp, sp, a0 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv8r.v v0, v8 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v24 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v8, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v12 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v4 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmax.vv v16, v0, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 -; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0 -; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 -; ZVFHMIN-NEXT: ret +; CHECK-LABEL: vfmax_nxv32bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v0, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v3, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfmax.vv v16, v16, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %v = call @llvm.maximum.nxv32bf16( %a, %b) ret %v } @@ -302,16 +203,14 @@ define @vfmax_nxv1f16_vv( %a, @vfmax_nxv2f16_vv( %a, @vfmax_nxv4f16_vv( %a, @vfmax_nxv8f16_vv( %a, @vfmax_nxv16f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_nxv16f32_vv( %a, @vfmax_nxv8f64_vv( %a, @vfmax_nxv1f16_vv_nnana( %a, @vfmax_nxv1f16_vv_nnanb( %a, @vfmax_vv_nxv1bf16_unmasked( % ; CHECK-LABEL: vfmax_vv_nxv1bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmax.vv v9, v8, v9 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 +; CHECK-NEXT: vfmax.vv v9, v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret @@ -87,16 +85,14 @@ define @vfmax_vv_nxv2bf16_unmasked( % ; CHECK-LABEL: vfmax_vv_nxv2bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmax.vv v9, v8, v9 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 +; CHECK-NEXT: vfmax.vv v9, v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret @@ -134,15 +130,13 @@ define @vfmax_vv_nxv4bf16_unmasked( % ; CHECK-LABEL: vfmax_vv_nxv4bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 ; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 +; CHECK-NEXT: vmerge.vvm v8, v12, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 ; CHECK-NEXT: vfmax.vv v10, v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 @@ -181,15 +175,13 @@ define @vfmax_vv_nxv8bf16_unmasked( % ; CHECK-LABEL: vfmax_vv_nxv8bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 +; CHECK-NEXT: vmerge.vvm v8, v16, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 ; CHECK-NEXT: vfmax.vv v12, v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 @@ -247,9 +239,8 @@ define @vfmax_vv_nxv16bf16_unmasked( @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32bf16_unmasked( @vfmax_vv_nxv32bf16_unmasked( @vfmax_vv_nxv32bf16_unmasked( @vfmax_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -617,16 +560,14 @@ define @vfmax_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -687,15 +628,13 @@ define @vfmax_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -759,15 +698,13 @@ define @vfmax_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -850,9 +787,8 @@ define @vfmax_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 -; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -905,12 +841,7 @@ define @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16_unmasked( %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v7, v16, v16 ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmfeq.vv v0, v16, v16 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vfmax.vv v8, v8, v24 ; ZVFH-NEXT: ret @@ -1043,19 +943,14 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v7, v16, a2 +; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 @@ -1066,32 +961,27 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v6, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmfeq.vv v5, v24, v24, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v6 -; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 +; ZVFHMIN-NEXT: vmfeq.vv v6, v8, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v5 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: @@ -1101,26 +991,25 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 @@ -1423,9 +1312,8 @@ define @vfmax_vv_nxv8f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -1458,26 +1346,27 @@ define @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64_unmasked( ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index c907e8b60c75b..ed428fa4b79bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -22,16 +22,14 @@ define @vfmin_nxv1bf16_vv( %a, @vfmin_nxv2bf16_vv( %a, @vfmin_nxv4bf16_vv( %a, @vfmin_nxv8bf16_vv( %a, @vfmin_nxv16bf16_vv( %a, @vfmin_nxv16bf16_vv( %a, @llvm.minimum.nxv32bf16(, ) define @vfmin_nxv32bf16_vv( %a, %b) nounwind { -; ZVFH-LABEL: vfmin_nxv32bf16_vv: -; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: mv a1, a0 -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add a0, a0, a1 -; ZVFH-NEXT: sub sp, sp, a0 -; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFH-NEXT: vmv8r.v v24, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFH-NEXT: vmv8r.v v0, v8 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v24 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v0 -; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v3, v16, v16 -; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFH-NEXT: vmv1r.v v0, v3 -; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v12 -; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v4 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFH-NEXT: vfmin.vv v16, v0, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v7, v24, v24 -; ZVFH-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vmerge.vvm v8, v24, v8, v0 -; ZVFH-NEXT: vfmin.vv v16, v8, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v24 -; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: mv a1, a0 -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add a0, a0, a1 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vfmin_nxv32bf16_vv: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 -; ZVFHMIN-NEXT: sub sp, sp, a0 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv8r.v v0, v8 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v24 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v0 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v8, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v12 -; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v4 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmin.vv v16, v0, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 -; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0 -; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v24 -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 -; ZVFHMIN-NEXT: ret +; CHECK-LABEL: vfmin_nxv32bf16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v0, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 +; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v3, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28 +; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfmin.vv v16, v16, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24 +; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %v = call @llvm.minimum.nxv32bf16( %a, %b) ret %v } @@ -302,16 +203,14 @@ define @vfmin_nxv1f16_vv( %a, @vfmin_nxv2f16_vv( %a, @vfmin_nxv4f16_vv( %a, @vfmin_nxv8f16_vv( %a, @vfmin_nxv16f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_nxv16f32_vv( %a, @vfmin_nxv8f64_vv( %a, @vfmin_nxv1f16_vv_nnana( %a, @vfmin_nxv1f16_vv_nnanb( %a, @vfmin_vv_nxv1bf16_unmasked( % ; CHECK-LABEL: vfmin_vv_nxv1bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmin.vv v9, v8, v9 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 +; CHECK-NEXT: vfmin.vv v9, v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret @@ -87,16 +85,14 @@ define @vfmin_vv_nxv2bf16_unmasked( % ; CHECK-LABEL: vfmin_vv_nxv2bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmin.vv v9, v8, v9 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 +; CHECK-NEXT: vfmin.vv v9, v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret @@ -134,15 +130,13 @@ define @vfmin_vv_nxv4bf16_unmasked( % ; CHECK-LABEL: vfmin_vv_nxv4bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 ; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 +; CHECK-NEXT: vmerge.vvm v8, v12, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 ; CHECK-NEXT: vfmin.vv v10, v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 @@ -181,15 +175,13 @@ define @vfmin_vv_nxv8bf16_unmasked( % ; CHECK-LABEL: vfmin_vv_nxv8bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 +; CHECK-NEXT: vmerge.vvm v8, v16, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 ; CHECK-NEXT: vfmin.vv v12, v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 @@ -247,9 +239,8 @@ define @vfmin_vv_nxv16bf16_unmasked( @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32bf16_unmasked( @vfmin_vv_nxv32bf16_unmasked( @vfmin_vv_nxv32bf16_unmasked( @vfmin_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmin_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -617,16 +560,14 @@ define @vfmin_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmin_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -687,15 +628,13 @@ define @vfmin_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmin_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -759,15 +698,13 @@ define @vfmin_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmin_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -850,9 +787,8 @@ define @vfmin_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 -; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -905,12 +841,7 @@ define @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16_unmasked( %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v7, v16, v16 ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmfeq.vv v0, v16, v16 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vfmin.vv v8, v8, v24 ; ZVFH-NEXT: ret @@ -1043,19 +943,14 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v7, v16, a2 +; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: addi a2, a2, -1 ; ZVFHMIN-NEXT: and a2, a2, a3 @@ -1066,32 +961,27 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v6, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmfeq.vv v5, v24, v24, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v6 -; ZVFHMIN-NEXT: vmv8r.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 +; ZVFHMIN-NEXT: vmfeq.vv v6, v8, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v5 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2 ; ZVFHMIN-NEXT: # %bb.1: @@ -1101,26 +991,25 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 @@ -1423,9 +1312,8 @@ define @vfmin_vv_nxv8f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -1458,26 +1346,27 @@ define @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64_unmasked( ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll index 7a4695d1c25c1..c796732ec8036 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll @@ -9,10 +9,10 @@ declare @llvm.experimental.constrained.nearbyint.nxv1f16( @nearbyint_nxv1f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -33,10 +33,10 @@ declare @llvm.experimental.constrained.nearbyint.nxv2f16( @nearbyint_nxv2f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -57,10 +57,10 @@ declare @llvm.experimental.constrained.nearbyint.nxv4f16( @nearbyint_nxv4f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -81,10 +81,10 @@ declare @llvm.experimental.constrained.nearbyint.nxv8f16( @nearbyint_nxv8f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -105,10 +105,10 @@ declare @llvm.experimental.constrained.nearbyint.nxv16f16(< define @nearbyint_nxv16f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -129,10 +129,10 @@ declare @llvm.experimental.constrained.nearbyint.nxv32f16(< define @nearbyint_nxv32f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -273,10 +273,10 @@ declare @llvm.experimental.constrained.nearbyint.nxv1f64(< define @nearbyint_nxv1f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -297,10 +297,10 @@ declare @llvm.experimental.constrained.nearbyint.nxv2f64(< define @nearbyint_nxv2f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -321,10 +321,10 @@ declare @llvm.experimental.constrained.nearbyint.nxv4f64(< define @nearbyint_nxv4f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -345,10 +345,10 @@ declare @llvm.experimental.constrained.nearbyint.nxv8f64(< define @nearbyint_nxv8f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll index 4ea3269cec0b1..61bb9a7658c38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll @@ -133,11 +133,11 @@ define @nearbyint_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -380,11 +380,11 @@ define @nearbyint_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll index 3d992aa13e379..a9fb8f227ff3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll @@ -14,9 +14,9 @@ define @round_nxv1f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -37,9 +37,9 @@ define @round_nxv2f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -60,9 +60,9 @@ define @round_nxv4f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -83,9 +83,9 @@ define @round_nxv8f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -106,9 +106,9 @@ define @round_nxv16f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -129,9 +129,9 @@ define @round_nxv32f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -152,9 +152,9 @@ define @round_nxv1f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -175,9 +175,9 @@ define @round_nxv2f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -198,9 +198,9 @@ define @round_nxv4f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -221,9 +221,9 @@ define @round_nxv8f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -244,9 +244,9 @@ define @round_nxv16f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -267,9 +267,9 @@ define @round_nxv1f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -290,9 +290,9 @@ define @round_nxv2f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -313,9 +313,9 @@ define @round_nxv4f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -336,9 +336,9 @@ define @round_nxv8f64( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll index f7422b279149f..8de4506ffa1ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll @@ -20,11 +20,11 @@ define @round_nxv1bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -43,11 +43,11 @@ define @round_nxv2bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -66,11 +66,11 @@ define @round_nxv4bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -89,11 +89,11 @@ define @round_nxv8bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -112,11 +112,11 @@ define @round_nxv16bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -135,11 +135,11 @@ define @round_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -173,8 +173,8 @@ define @round_nxv1f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -187,11 +187,11 @@ define @round_nxv1f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -212,8 +212,8 @@ define @round_nxv2f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -226,11 +226,11 @@ define @round_nxv2f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -251,8 +251,8 @@ define @round_nxv4f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -265,11 +265,11 @@ define @round_nxv4f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -290,8 +290,8 @@ define @round_nxv8f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -304,11 +304,11 @@ define @round_nxv8f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -329,8 +329,8 @@ define @round_nxv16f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -343,11 +343,11 @@ define @round_nxv16f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -368,8 +368,8 @@ define @round_nxv32f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -382,11 +382,11 @@ define @round_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -421,8 +421,8 @@ define @round_nxv1f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -441,8 +441,8 @@ define @round_nxv2f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -461,8 +461,8 @@ define @round_nxv4f32( %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -481,8 +481,8 @@ define @round_nxv8f32( %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -501,8 +501,8 @@ define @round_nxv16f32( %x) { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -521,8 +521,8 @@ define @round_nxv1f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -541,8 +541,8 @@ define @round_nxv2f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -561,8 +561,8 @@ define @round_nxv4f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -581,8 +581,8 @@ define @round_nxv8f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll index c293ac91b63bf..d2c77de10dcb1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll @@ -14,9 +14,9 @@ define @roundeven_nxv1f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -37,9 +37,9 @@ define @roundeven_nxv2f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -60,9 +60,9 @@ define @roundeven_nxv4f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -83,9 +83,9 @@ define @roundeven_nxv8f16( %x) strictfp { ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -106,9 +106,9 @@ define @roundeven_nxv16f16( %x) strictf ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -129,9 +129,9 @@ define @roundeven_nxv32f16( %x) strictf ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -152,9 +152,9 @@ define @roundeven_nxv1f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -175,9 +175,9 @@ define @roundeven_nxv2f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -198,9 +198,9 @@ define @roundeven_nxv4f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -221,9 +221,9 @@ define @roundeven_nxv8f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -244,9 +244,9 @@ define @roundeven_nxv16f32( %x) stric ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -267,9 +267,9 @@ define @roundeven_nxv1f64( %x) strict ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -290,9 +290,9 @@ define @roundeven_nxv2f64( %x) strict ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -313,9 +313,9 @@ define @roundeven_nxv4f64( %x) strict ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -336,9 +336,9 @@ define @roundeven_nxv8f64( %x) strict ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll index 865531b77eb29..72f87134d963c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll @@ -19,11 +19,11 @@ define @roundeven_nxv1bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -42,11 +42,11 @@ define @roundeven_nxv2bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -65,11 +65,11 @@ define @roundeven_nxv4bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -88,11 +88,11 @@ define @roundeven_nxv8bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -111,11 +111,11 @@ define @roundeven_nxv16bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -134,11 +134,11 @@ define @roundeven_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -172,8 +172,8 @@ define @roundeven_nxv1f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -186,11 +186,11 @@ define @roundeven_nxv1f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -211,8 +211,8 @@ define @roundeven_nxv2f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -225,11 +225,11 @@ define @roundeven_nxv2f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -250,8 +250,8 @@ define @roundeven_nxv4f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -264,11 +264,11 @@ define @roundeven_nxv4f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -289,8 +289,8 @@ define @roundeven_nxv8f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -303,11 +303,11 @@ define @roundeven_nxv8f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -328,8 +328,8 @@ define @roundeven_nxv16f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -342,11 +342,11 @@ define @roundeven_nxv16f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -367,8 +367,8 @@ define @roundeven_nxv32f16( %x) { ; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -381,11 +381,11 @@ define @roundeven_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -420,8 +420,8 @@ define @roundeven_nxv1f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -440,8 +440,8 @@ define @roundeven_nxv2f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -460,8 +460,8 @@ define @roundeven_nxv4f32( %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,8 +480,8 @@ define @roundeven_nxv8f32( %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -500,8 +500,8 @@ define @roundeven_nxv16f32( %x) { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -520,8 +520,8 @@ define @roundeven_nxv1f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -540,8 +540,8 @@ define @roundeven_nxv2f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -560,8 +560,8 @@ define @roundeven_nxv4f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -580,8 +580,8 @@ define @roundeven_nxv8f64( %x) { ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index c11373fd2e426..61228bd01384a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -9,9 +9,9 @@ define @fshr_v1i8( %a, %b, ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -26,9 +26,9 @@ define @fshl_v1i8( %a, %b, ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -43,9 +43,9 @@ define @fshr_v2i8( %a, %b, ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -60,9 +60,9 @@ define @fshl_v2i8( %a, %b, ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -77,9 +77,9 @@ define @fshr_v4i8( %a, %b, ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -94,9 +94,9 @@ define @fshl_v4i8( %a, %b, ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -111,9 +111,9 @@ define @fshr_v8i8( %a, %b, ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -128,9 +128,9 @@ define @fshl_v8i8( %a, %b, ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v11, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -145,9 +145,9 @@ define @fshr_v16i8( %a, ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v14, v12, v0.t +; CHECK-NEXT: vand.vi v12, v12, 7, v0.t ; CHECK-NEXT: vand.vi v14, v14, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t -; CHECK-NEXT: vand.vi v12, v12, 7, v0.t ; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret @@ -162,9 +162,9 @@ define @fshl_v16i8( %a, ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vsrl.vi v10, v10, 1, v0.t ; CHECK-NEXT: vnot.v v14, v12, v0.t +; CHECK-NEXT: vand.vi v12, v12, 7, v0.t ; CHECK-NEXT: vand.vi v14, v14, 7, v0.t ; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t -; CHECK-NEXT: vand.vi v12, v12, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret @@ -179,9 +179,9 @@ define @fshr_v32i8( %a, ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v20, v16, v0.t +; CHECK-NEXT: vand.vi v16, v16, 7, v0.t ; CHECK-NEXT: vand.vi v20, v20, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t -; CHECK-NEXT: vand.vi v16, v16, 7, v0.t ; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret @@ -196,9 +196,9 @@ define @fshl_v32i8( %a, ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vsrl.vi v12, v12, 1, v0.t ; CHECK-NEXT: vnot.v v20, v16, v0.t +; CHECK-NEXT: vand.vi v16, v16, 7, v0.t ; CHECK-NEXT: vand.vi v20, v20, 7, v0.t ; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t -; CHECK-NEXT: vand.vi v16, v16, 7, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret @@ -281,9 +281,9 @@ define @fshr_v1i16( %a, ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -298,9 +298,9 @@ define @fshl_v1i16( %a, ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -315,9 +315,9 @@ define @fshr_v2i16( %a, ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -332,9 +332,9 @@ define @fshl_v2i16( %a, ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -349,9 +349,9 @@ define @fshr_v4i16( %a, ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -366,9 +366,9 @@ define @fshl_v4i16( %a, ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v11, v11, 15, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -383,9 +383,9 @@ define @fshr_v8i16( %a, ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v14, v12, v0.t +; CHECK-NEXT: vand.vi v12, v12, 15, v0.t ; CHECK-NEXT: vand.vi v14, v14, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t -; CHECK-NEXT: vand.vi v12, v12, 15, v0.t ; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret @@ -400,9 +400,9 @@ define @fshl_v8i16( %a, ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsrl.vi v10, v10, 1, v0.t ; CHECK-NEXT: vnot.v v14, v12, v0.t +; CHECK-NEXT: vand.vi v12, v12, 15, v0.t ; CHECK-NEXT: vand.vi v14, v14, 15, v0.t ; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t -; CHECK-NEXT: vand.vi v12, v12, 15, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret @@ -417,9 +417,9 @@ define @fshr_v16i16( %a, @fshl_v16i16( %a, @fshr_v1i32( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshr.nxv1i32( %a, %b, %c, %m, i32 %evl) @@ -536,12 +536,12 @@ define @fshl_v1i32( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshl.nxv1i32( %a, %b, %c, %m, i32 %evl) @@ -554,12 +554,12 @@ define @fshr_v2i32( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshr.nxv2i32( %a, %b, %c, %m, i32 %evl) @@ -572,12 +572,12 @@ define @fshl_v2i32( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshl.nxv2i32( %a, %b, %c, %m, i32 %evl) @@ -590,12 +590,12 @@ define @fshr_v4i32( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vand.vx v14, v12, a1, v0.t -; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t -; CHECK-NEXT: vnot.v v12, v12, v0.t -; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vnot.v v14, v12, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t +; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vand.vx v14, v14, a1, v0.t +; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshr.nxv4i32( %a, %b, %c, %m, i32 %evl) @@ -608,12 +608,12 @@ define @fshl_v4i32( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vand.vx v14, v12, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t -; CHECK-NEXT: vnot.v v12, v12, v0.t -; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vnot.v v14, v12, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 1, v0.t -; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t +; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vand.vx v14, v14, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshl.nxv4i32( %a, %b, %c, %m, i32 %evl) @@ -626,12 +626,12 @@ define @fshr_v8i32( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vand.vx v20, v16, a1, v0.t -; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t -; CHECK-NEXT: vnot.v v16, v16, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v20, v16, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v20, v20, a1, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshr.nxv8i32( %a, %b, %c, %m, i32 %evl) @@ -644,12 +644,12 @@ define @fshl_v8i32( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 31 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vand.vx v20, v16, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t -; CHECK-NEXT: vnot.v v16, v16, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v20, v16, v0.t ; CHECK-NEXT: vsrl.vi v12, v12, 1, v0.t -; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v20, v20, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshl.nxv8i32( %a, %b, %c, %m, i32 %evl) @@ -733,12 +733,12 @@ define @fshr_v1i64( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshr.nxv1i64( %a, %b, %c, %m, i32 %evl) @@ -751,12 +751,12 @@ define @fshl_v1i64( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vand.vx v11, v10, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t -; CHECK-NEXT: vnot.v v10, v10, v0.t -; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vx v11, v11, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshl.nxv1i64( %a, %b, %c, %m, i32 %evl) @@ -769,12 +769,12 @@ define @fshr_v2i64( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vand.vx v14, v12, a1, v0.t -; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t -; CHECK-NEXT: vnot.v v12, v12, v0.t -; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vnot.v v14, v12, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t +; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vand.vx v14, v14, a1, v0.t +; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshr.nxv2i64( %a, %b, %c, %m, i32 %evl) @@ -787,12 +787,12 @@ define @fshl_v2i64( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vand.vx v14, v12, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t -; CHECK-NEXT: vnot.v v12, v12, v0.t -; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vnot.v v14, v12, v0.t ; CHECK-NEXT: vsrl.vi v10, v10, 1, v0.t -; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t +; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vand.vx v14, v14, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t +; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t ; CHECK-NEXT: vor.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshl.nxv2i64( %a, %b, %c, %m, i32 %evl) @@ -805,12 +805,12 @@ define @fshr_v4i64( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vand.vx v20, v16, a1, v0.t -; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t -; CHECK-NEXT: vnot.v v16, v16, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v20, v16, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v20, v20, a1, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshr.nxv4i64( %a, %b, %c, %m, i32 %evl) @@ -823,12 +823,12 @@ define @fshl_v4i64( %a, ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 63 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vand.vx v20, v16, a1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t -; CHECK-NEXT: vnot.v v16, v16, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vnot.v v20, v16, v0.t ; CHECK-NEXT: vsrl.vi v12, v12, 1, v0.t -; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v20, v20, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t ; CHECK-NEXT: vor.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshl.nxv4i64( %a, %b, %c, %m, i32 %evl) @@ -1001,42 +1001,42 @@ define @fshr_v16i64( %a, @fshr_v16i64( %a, @fshl_v16i64( %a, @fshr_v1i9( %a, %b, ; CHECK-NEXT: vand.vx v10, v10, a1, v0.t ; CHECK-NEXT: li a0, 9 ; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t ; CHECK-NEXT: vadd.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v10, 15, v0.t -; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t ; CHECK-NEXT: vnot.v v10, v10, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t ; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t @@ -1203,11 +1210,11 @@ define @fshl_v1i9( %a, %b, ; CHECK-NEXT: vand.vx v10, v10, a1, v0.t ; CHECK-NEXT: li a0, 9 ; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t ; CHECK-NEXT: vand.vi v11, v10, 15, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t ; CHECK-NEXT: vnot.v v10, v10, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t ; CHECK-NEXT: vand.vi v10, v10, 15, v0.t -; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t ; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t @@ -1225,11 +1232,11 @@ define @fshr_v1i4( %a, %b, ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t ; CHECK-NEXT: vand.vi v9, v9, 15, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vremu.vx v9, v10, a0, v0.t -; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsrl.vv v8, v8, v10, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret %trunca = call @llvm.vp.trunc.nxv1i4.nxv1i8( %a, %m, i32 zeroext %evl) @@ -1247,11 +1254,11 @@ define @fshl_v1i4( %a, %b, ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t ; CHECK-NEXT: vand.vi v9, v9, 15, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vremu.vx v9, v10, a0, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll index 8a5f118d8f6ac..736f532b00d42 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll @@ -7,10 +7,10 @@ define @trunc_nxv1f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -28,10 +28,10 @@ declare @llvm.experimental.constrained.trunc.nxv1f16( @trunc_nxv2f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -49,10 +49,10 @@ declare @llvm.experimental.constrained.trunc.nxv2f16( @trunc_nxv4f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -70,10 +70,10 @@ declare @llvm.experimental.constrained.trunc.nxv4f16( @trunc_nxv8f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -91,10 +91,10 @@ declare @llvm.experimental.constrained.trunc.nxv8f16( @trunc_nxv16f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -112,10 +112,10 @@ declare @llvm.experimental.constrained.trunc.nxv16f16( @trunc_nxv32f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -137,8 +137,8 @@ define @trunc_nxv1f32( %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -158,8 +158,8 @@ define @trunc_nxv2f32( %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t @@ -179,8 +179,8 @@ define @trunc_nxv4f32( %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vfabs.v v10, v8 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t @@ -200,8 +200,8 @@ define @trunc_nxv8f32( %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t @@ -221,8 +221,8 @@ define @trunc_nxv16f32( %x) strictfp ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vfabs.v v16, v8 +; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t @@ -238,10 +238,10 @@ declare @llvm.experimental.constrained.trunc.nxv16f32( @trunc_nxv1f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -259,10 +259,10 @@ declare @llvm.experimental.constrained.trunc.nxv1f64( @trunc_nxv2f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -280,10 +280,10 @@ declare @llvm.experimental.constrained.trunc.nxv2f64( @trunc_nxv4f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -301,10 +301,10 @@ declare @llvm.experimental.constrained.trunc.nxv4f64( @trunc_nxv8f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll index ae0542fb5b74f..9522a4daa4e6f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/half-round-conv.ll @@ -335,8 +335,8 @@ define @ceil_nxv1f16_to_si64( %x) { ; CHECK-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -392,8 +392,8 @@ define @ceil_nxv1f16_to_ui64( %x) { ; CHECK-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -537,8 +537,8 @@ define @ceil_nxv4f16_to_si64( %x) { ; CHECK-NEXT: flh fa5, %lo(.LCPI30_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -594,8 +594,8 @@ define @ceil_nxv4f16_to_ui64( %x) { ; CHECK-NEXT: flh fa5, %lo(.LCPI31_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir index a967f86f5b930..2d49b4e4f493f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir +++ b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir @@ -18,7 +18,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 192 /* e8, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $v8 = PseudoVMERGE_VIM_M1 undef renamable $v8, killed renamable $v2, 1, $v0, 1, 3 /* e8 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: renamable $v0 = COPY $v1, implicit $vtype + ; CHECK-NEXT: $v0 = COPY killed renamable $v1, implicit $vtype ; CHECK-NEXT: renamable $v9 = PseudoVMERGE_VIM_M1 undef renamable $v9, killed renamable $v3, 1, $v0, 1, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: renamable $v0 = PseudoVADD_VV_M1 undef renamable $v0, killed renamable $v8, killed renamable $v9, 1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoRET implicit $v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index 2757e140ecde5..c1ce0e40cc16d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -1219,15 +1219,16 @@ declare @llvm.vector.insert.nxv8p0.nxv16p0( %ptrs0, %ptrs1, %m, %passthru0, %passthru1, ptr %out) { ; RV32-LABEL: mgather_nxv16i64: ; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v7, v0 ; RV32-NEXT: vl8re64.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: srli a2, a0, 3 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v24, (zero), v12, v0.t +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: vs8r.v v24, (a0) @@ -1236,20 +1237,37 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ; ; RV64-LABEL: mgather_nxv16i64: ; RV64: # %bb.0: -; RV64-NEXT: vl8re64.v v24, (a0) -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV64-NEXT: vluxei64.v v24, (zero), v8, v0.t -; RV64-NEXT: vl8re64.v v8, (a1) -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: srli a1, a0, 3 +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: sub sp, sp, a3 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vmv1r.v v7, v0 +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV64-NEXT: vl8re64.v v24, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: vl8re64.v v8, (a0) +; RV64-NEXT: srli a0, a1, 3 +; RV64-NEXT: vslidedown.vx v0, v0, a0 +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v24, (zero), v16, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vluxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: vs8r.v v24, (a1) +; RV64-NEXT: vs8r.v v8, (a2) +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a2, a0 -; RV64-NEXT: vs8r.v v8, (a0) -; RV64-NEXT: vs8r.v v24, (a2) +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 16 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %p0 = call @llvm.vector.insert.nxv8p0.nxv16p0( undef, %ptrs0, i64 0) %p1 = call @llvm.vector.insert.nxv8p0.nxv16p0( %p0, %ptrs1, i64 8) @@ -2330,12 +2348,12 @@ define @mgather_baseidx_nxv32i8(ptr %base, ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v24, v9 ; RV64-NEXT: srli a2, a1, 3 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v9 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64-NEXT: vsetvli a3, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t ; RV64-NEXT: srli a1, a1, 2 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index ef2085323823b..caae84dc31f94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1904,35 +1904,26 @@ define void @mscatter_nxv16f64( %val0, %val0, %val0, @vp_nearbyint_nxv32bf16( %va ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: lui a3, 307200 +; CHECK-NEXT: frflags a4 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 @@ -307,12 +308,11 @@ define @vp_nearbyint_nxv32bf16( %va ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v5, v16, fa5, v0.t -; CHECK-NEXT: frflags a2 ; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: fsflags a2 +; CHECK-NEXT: fsflags a4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t ; CHECK-NEXT: vmv1r.v v0, v6 @@ -353,11 +353,12 @@ define @vp_nearbyint_nxv32bf16_unmasked( @vp_nearbyint_nxv32bf16_unmasked( @llvm.vp.nearbyint.nxv8f16(, @vp_nearbyint_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI18_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: frflags a0 @@ -753,11 +753,11 @@ declare @llvm.vp.nearbyint.nxv16f16(, < define @vp_nearbyint_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v12, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) ; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: frflags a0 @@ -841,11 +841,11 @@ declare @llvm.vp.nearbyint.nxv32f16(, < define @vp_nearbyint_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v16, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: frflags a0 @@ -864,6 +864,7 @@ define @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: frflags a4 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: vmv1r.v v6, v7 @@ -958,12 +959,11 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v6, v16, fa5, v0.t -; ZVFHMIN-NEXT: frflags a2 ; ZVFHMIN-NEXT: vmv1r.v v0, v6 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t -; ZVFHMIN-NEXT: fsflags a2 +; ZVFHMIN-NEXT: fsflags a4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v7 @@ -1081,9 +1081,9 @@ define @vp_nearbyint_nxv4f32( %va, @vp_nearbyint_nxv8f32( %va, @vp_nearbyint_nxv16f32( %va, < ; CHECK-LABEL: vp_nearbyint_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t @@ -1254,11 +1254,11 @@ declare @llvm.vp.nearbyint.nxv2f64(, define @vp_nearbyint_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -1298,11 +1298,11 @@ declare @llvm.vp.nearbyint.nxv4f64(, define @vp_nearbyint_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -1342,11 +1342,11 @@ declare @llvm.vp.nearbyint.nxv7f64(, define @vp_nearbyint_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -1386,11 +1386,11 @@ declare @llvm.vp.nearbyint.nxv8f64(, define @vp_nearbyint_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 @@ -1438,21 +1438,21 @@ define @vp_nearbyint_nxv16f64( %va, ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: frflags a3 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: frflags a2 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: fsflags a2 +; CHECK-NEXT: fsflags a3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB44_2 @@ -1462,10 +1462,11 @@ define @vp_nearbyint_nxv16f64( %va, ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v6, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1487,13 +1488,13 @@ define @vp_nearbyint_nxv16f64_unmasked( %x, i64 %y) { ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a2, a1, 4 ; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv2r.v v0, v8 ; CHECK-NEXT: bltu a0, a2, .LBB0_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a2 @@ -20,20 +22,29 @@ define i1 @foo( %x, i64 %y) { ; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: addi a2, sp, 64 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 64 +; CHECK-NEXT: addi a3, sp, 64 +; CHECK-NEXT: vs2r.v v0, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: add a0, a2, a0 ; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: vs8r.v v24, (a1) -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 +; CHECK-NEXT: vs8r.v v16, (a1) +; CHECK-NEXT: addi a1, sp, 64 +; CHECK-NEXT: vl2r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vs8r.v v8, (a2) ; CHECK-NEXT: lbu a0, 0(a0) ; CHECK-NEXT: addi sp, s0, -80 diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index a9505dca97529..f42913d5bf417 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -21,18 +21,16 @@ define @vp_rint_nxv1bf16( %va, @vp_rint_nxv2bf16( %va, @vp_rint_nxv4bf16( %va, @vp_rint_nxv8bf16( %va, @vp_rint_nxv16bf16( %va, @vp_rint_nxv32bf16( %va, @vp_rint_nxv32bf16( %va, @vp_rint_nxv32bf16_unmasked( ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 -; CHECK-NEXT: fmv.w.x fa5, a3 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: sub a4, a0, a1 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v7, v16, a2 -; CHECK-NEXT: sltu a2, a0, a3 -; CHECK-NEXT: vmv1r.v v6, v7 +; CHECK-NEXT: sltu a2, a0, a4 ; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: and a2, a2, a4 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: fmv.w.x fa5, a3 +; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v16, fa5, v0.t -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -398,18 +394,16 @@ define @vp_rint_nxv1f16( %va, @vp_rint_nxv2f16( %va, @vp_rint_nxv4f16( %va, @llvm.vp.rint.nxv8f16(, @vp_rint_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI18_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t ; ZVFH-NEXT: vmv1r.v v0, v10 @@ -634,10 +626,10 @@ define @vp_rint_nxv8f16( %va, @llvm.vp.rint.nxv16f16(, @vp_rint_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v12, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) ; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t ; ZVFH-NEXT: vmv1r.v v0, v12 @@ -714,10 +706,10 @@ define @vp_rint_nxv16f16( %va, @llvm.vp.rint.nxv32f16(, @vp_rint_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v16, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t ; ZVFH-NEXT: vmv1r.v v0, v16 @@ -796,21 +788,21 @@ define @vp_rint_nxv32f16( %va, @vp_rint_nxv32f16( %va, @vp_rint_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: lui a3, 307200 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 -; ZVFHMIN-NEXT: fmv.w.x fa5, a3 -; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: sub a4, a0, a1 +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v16, a2 -; ZVFHMIN-NEXT: sltu a2, a0, a3 -; ZVFHMIN-NEXT: vmv1r.v v6, v7 +; ZVFHMIN-NEXT: sltu a2, a0, a4 ; ZVFHMIN-NEXT: addi a2, a2, -1 -; ZVFHMIN-NEXT: and a2, a2, a3 +; ZVFHMIN-NEXT: and a2, a2, a4 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t +; ZVFHMIN-NEXT: fmv.w.x fa5, a3 +; ZVFHMIN-NEXT: vmv1r.v v12, v7 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v6, v16, fa5, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmflt.vf v12, v16, fa5, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -993,10 +985,10 @@ define @vp_rint_nxv4f32( %va, @vp_rint_nxv8f32( %va, @vp_rint_nxv16f32( %va, @llvm.vp.rint.nxv2f64(, @vp_rint_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 @@ -1190,11 +1182,11 @@ declare @llvm.vp.rint.nxv4f64(, @vp_rint_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 @@ -1230,11 +1222,11 @@ declare @llvm.vp.rint.nxv7f64(, @vp_rint_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -1270,11 +1262,11 @@ declare @llvm.vp.rint.nxv8f64(, @vp_rint_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -1318,13 +1310,13 @@ define @vp_rint_nxv16f64( %va, @vp_rint_nxv16f64( %va, @vp_round_nxv1bf16( %va, @vp_round_nxv1bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -76,12 +76,12 @@ define @vp_round_nxv2bf16( %va, @vp_round_nxv2bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -130,12 +130,12 @@ define @vp_round_nxv4bf16( %va, @vp_round_nxv4bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -184,12 +184,12 @@ define @vp_round_nxv8bf16( %va, @vp_round_nxv8bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -238,12 +238,12 @@ define @vp_round_nxv16bf16( %va, @vp_round_nxv16bf16_unmasked( @vp_round_nxv32bf16( %va, @vp_round_nxv32bf16( %va, @vp_round_nxv32bf16( %va, @vp_round_nxv32bf16_unmasked( @vp_round_nxv32bf16_unmasked( @vp_round_nxv32bf16_unmasked( @vp_round_nxv1f16( %va, @vp_round_nxv1f16( %va, @vp_round_nxv1f16_unmasked( %va, i ; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -471,11 +471,11 @@ define @vp_round_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -497,9 +497,9 @@ define @vp_round_nxv2f16( %va, @vp_round_nxv2f16( %va, @vp_round_nxv2f16_unmasked( %va, i ; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -557,11 +557,11 @@ define @vp_round_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -583,9 +583,9 @@ define @vp_round_nxv4f16( %va, @vp_round_nxv4f16( %va, @vp_round_nxv4f16_unmasked( %va, i ; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -643,11 +643,11 @@ define @vp_round_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -665,14 +665,14 @@ declare @llvm.vp.round.nxv8f16(, @vp_round_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI18_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -689,12 +689,12 @@ define @vp_round_nxv8f16( %va, @vp_round_nxv8f16_unmasked( %va, i ; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -731,11 +731,11 @@ define @vp_round_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -753,14 +753,14 @@ declare @llvm.vp.round.nxv16f16(, @vp_round_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v12, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) ; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vmv1r.v v0, v12 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -777,12 +777,12 @@ define @vp_round_nxv16f16( %va, @vp_round_nxv16f16_unmasked( %va ; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -819,11 +819,11 @@ define @vp_round_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -841,14 +841,14 @@ declare @llvm.vp.round.nxv32f16(, @vp_round_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v16, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vmv1r.v v0, v16 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -864,6 +864,7 @@ define @vp_round_nxv32f16( %va, @vp_round_nxv32f16( %va, @vp_round_nxv32f16( %va, @vp_round_nxv32f16_unmasked( %va ; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -941,11 +941,12 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: fsrmi a4, 4 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: vmv1r.v v6, v7 @@ -958,11 +959,10 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v6, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a2, 4 ; ZVFHMIN-NEXT: vmv1r.v v0, v6 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t -; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: fsrm a4 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t @@ -975,10 +975,10 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1000,9 +1000,9 @@ define @vp_round_nxv1f32( %va, @vp_round_nxv1f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1042,9 +1042,9 @@ define @vp_round_nxv2f32( %va, @vp_round_nxv2f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1081,13 +1081,13 @@ define @vp_round_nxv4f32( %va, @vp_round_nxv4f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1125,13 +1125,13 @@ define @vp_round_nxv8f32( %va, @vp_round_nxv8f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1169,13 +1169,13 @@ define @vp_round_nxv16f32( %va, @vp_round_nxv16f32_unmasked( % ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1216,9 +1216,9 @@ define @vp_round_nxv1f64( %va, @vp_round_nxv1f64_unmasked( %v ; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1254,14 +1254,14 @@ declare @llvm.vp.round.nxv2f64(, @vp_round_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -1281,8 +1281,8 @@ define @vp_round_nxv2f64_unmasked( %v ; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1298,14 +1298,14 @@ declare @llvm.vp.round.nxv4f64(, @vp_round_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1325,8 +1325,8 @@ define @vp_round_nxv4f64_unmasked( %v ; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1342,14 +1342,14 @@ declare @llvm.vp.round.nxv7f64(, @vp_round_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1369,8 +1369,8 @@ define @vp_round_nxv7f64_unmasked( %v ; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1386,14 +1386,14 @@ declare @llvm.vp.round.nxv8f64(, @vp_round_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1413,8 +1413,8 @@ define @vp_round_nxv8f64_unmasked( %v ; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1438,20 +1438,20 @@ define @vp_round_nxv16f64( %va, @vp_round_nxv16f64( %va, @vp_round_nxv16f64_unmasked( ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: fsrmi a3, 4 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a2, 4 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a3 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -1502,8 +1503,8 @@ define @vp_round_nxv16f64_unmasked( ; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll index 3975423e6f985..ebd9221366472 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -22,12 +22,12 @@ define @vp_roundeven_nxv1bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v11, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -49,11 +49,11 @@ define @vp_roundeven_nxv1bf16_unmasked( @vp_roundeven_nxv2bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v11, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -103,11 +103,11 @@ define @vp_roundeven_nxv2bf16_unmasked( @vp_roundeven_nxv4bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -157,11 +157,11 @@ define @vp_roundeven_nxv4bf16_unmasked( @vp_roundeven_nxv8bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -211,11 +211,11 @@ define @vp_roundeven_nxv8bf16_unmasked( @vp_roundeven_nxv16bf16( %va ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t @@ -265,11 +265,11 @@ define @vp_roundeven_nxv16bf16_unmasked( @vp_roundeven_nxv32bf16( %va ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: lui a3, 307200 +; CHECK-NEXT: fsrmi a4, 0 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 @@ -307,11 +308,10 @@ define @vp_roundeven_nxv32bf16( %va ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v5, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 0 ; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a4 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t @@ -326,11 +326,11 @@ define @vp_roundeven_nxv32bf16( %va ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t @@ -353,11 +353,12 @@ define @vp_roundeven_nxv32bf16_unmasked( @vp_roundeven_nxv32bf16_unmasked( @vp_roundeven_nxv32bf16_unmasked( @vp_roundeven_nxv1f16( %va, @vp_roundeven_nxv1f16( %va, @vp_roundeven_nxv1f16_unmasked( %v ; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -471,11 +471,11 @@ define @vp_roundeven_nxv1f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -497,9 +497,9 @@ define @vp_roundeven_nxv2f16( %va, @vp_roundeven_nxv2f16( %va, @vp_roundeven_nxv2f16_unmasked( %v ; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -557,11 +557,11 @@ define @vp_roundeven_nxv2f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -583,9 +583,9 @@ define @vp_roundeven_nxv4f16( %va, @vp_roundeven_nxv4f16( %va, @vp_roundeven_nxv4f16_unmasked( %v ; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -643,11 +643,11 @@ define @vp_roundeven_nxv4f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -665,14 +665,14 @@ declare @llvm.vp.roundeven.nxv8f16(, @vp_roundeven_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI18_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -689,12 +689,12 @@ define @vp_roundeven_nxv8f16( %va, @vp_roundeven_nxv8f16_unmasked( %v ; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -731,11 +731,11 @@ define @vp_roundeven_nxv8f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -753,14 +753,14 @@ declare @llvm.vp.roundeven.nxv16f16(, < define @vp_roundeven_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v12, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) ; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vmv1r.v v0, v12 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -777,12 +777,12 @@ define @vp_roundeven_nxv16f16( %va, @vp_roundeven_nxv16f16_unmasked( ; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -819,11 +819,11 @@ define @vp_roundeven_nxv16f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -841,14 +841,14 @@ declare @llvm.vp.roundeven.nxv32f16(, < define @vp_roundeven_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v16, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vmv1r.v v0, v16 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -864,6 +864,7 @@ define @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16_unmasked( ; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -941,11 +941,12 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: fsrmi a4, 0 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: vmv1r.v v6, v7 @@ -958,11 +959,10 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v6, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a2, 0 ; ZVFHMIN-NEXT: vmv1r.v v0, v6 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t -; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: fsrm a4 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t @@ -975,10 +975,10 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: .LBB23_2: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1000,9 +1000,9 @@ define @vp_roundeven_nxv1f32( %va, @vp_roundeven_nxv1f32_unmasked( ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1042,9 +1042,9 @@ define @vp_roundeven_nxv2f32( %va, @vp_roundeven_nxv2f32_unmasked( ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1081,13 +1081,13 @@ define @vp_roundeven_nxv4f32( %va, @vp_roundeven_nxv4f32_unmasked( ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1125,13 +1125,13 @@ define @vp_roundeven_nxv8f32( %va, @vp_roundeven_nxv8f32_unmasked( ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1169,13 +1169,13 @@ define @vp_roundeven_nxv16f32( %va, < ; CHECK-LABEL: vp_roundeven_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1195,8 +1195,8 @@ define @vp_roundeven_nxv16f32_unmasked( @vp_roundeven_nxv1f64( %va, @vp_roundeven_nxv1f64_unmasked( @llvm.vp.roundeven.nxv2f64(, define @vp_roundeven_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -1281,8 +1281,8 @@ define @vp_roundeven_nxv2f64_unmasked( @llvm.vp.roundeven.nxv4f64(, define @vp_roundeven_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1325,8 +1325,8 @@ define @vp_roundeven_nxv4f64_unmasked( @llvm.vp.roundeven.nxv7f64(, define @vp_roundeven_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1369,8 +1369,8 @@ define @vp_roundeven_nxv7f64_unmasked( @llvm.vp.roundeven.nxv8f64(, define @vp_roundeven_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1413,8 +1413,8 @@ define @vp_roundeven_nxv8f64_unmasked( @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: fsrmi a3, 0 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 0 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a3 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -1462,10 +1462,11 @@ define @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v6, v7 ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -1487,12 +1488,12 @@ define @vp_roundeven_nxv16f64_unmasked( @vp_roundeven_nxv16f64_unmasked( @vp_roundtozero_nxv1bf16( %va, ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v11, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -49,11 +49,11 @@ define @vp_roundtozero_nxv1bf16_unmasked( @vp_roundtozero_nxv2bf16( %va, ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v11, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -103,11 +103,11 @@ define @vp_roundtozero_nxv2bf16_unmasked( @vp_roundtozero_nxv4bf16( %va, ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -157,11 +157,11 @@ define @vp_roundtozero_nxv4bf16_unmasked( @vp_roundtozero_nxv8bf16( %va, ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -211,11 +211,11 @@ define @vp_roundtozero_nxv8bf16_unmasked( @vp_roundtozero_nxv16bf16( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t @@ -265,11 +265,11 @@ define @vp_roundtozero_nxv16bf16_unmasked( @vp_roundtozero_nxv32bf16( % ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: lui a3, 307200 +; CHECK-NEXT: fsrmi a4, 1 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 @@ -307,11 +308,10 @@ define @vp_roundtozero_nxv32bf16( % ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v5, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 1 ; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a4 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t @@ -326,11 +326,11 @@ define @vp_roundtozero_nxv32bf16( % ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t @@ -353,11 +353,12 @@ define @vp_roundtozero_nxv32bf16_unmasked( @vp_roundtozero_nxv32bf16_unmasked( @vp_roundtozero_nxv32bf16_unmasked( @vp_roundtozero_nxv1f16( %va, @vp_roundtozero_nxv1f16( %va, @vp_roundtozero_nxv1f16_unmasked( ; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -471,11 +471,11 @@ define @vp_roundtozero_nxv1f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -497,9 +497,9 @@ define @vp_roundtozero_nxv2f16( %va, @vp_roundtozero_nxv2f16( %va, @vp_roundtozero_nxv2f16_unmasked( ; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -557,11 +557,11 @@ define @vp_roundtozero_nxv2f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -583,9 +583,9 @@ define @vp_roundtozero_nxv4f16( %va, @vp_roundtozero_nxv4f16( %va, @vp_roundtozero_nxv4f16_unmasked( ; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -643,11 +643,11 @@ define @vp_roundtozero_nxv4f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -665,14 +665,14 @@ declare @llvm.vp.roundtozero.nxv8f16(, @vp_roundtozero_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v10, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI18_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0) ; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -689,12 +689,12 @@ define @vp_roundtozero_nxv8f16( %va, @vp_roundtozero_nxv8f16_unmasked( ; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -731,11 +731,11 @@ define @vp_roundtozero_nxv8f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -753,14 +753,14 @@ declare @llvm.vp.roundtozero.nxv16f16(, define @vp_roundtozero_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v12, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) ; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vmv1r.v v0, v12 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -777,12 +777,12 @@ define @vp_roundtozero_nxv16f16( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v24, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t @@ -805,8 +805,8 @@ define @vp_roundtozero_nxv16f16_unmasked( @vp_roundtozero_nxv16f16_unmasked( @llvm.vp.roundtozero.nxv32f16(, define @vp_roundtozero_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v16, v0 -; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vmv1r.v v0, v16 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -864,6 +864,7 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: fsrmi a4, 1 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 @@ -880,11 +881,10 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v5, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a2, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t -; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: fsrm a4 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t @@ -899,11 +899,11 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: vmv1r.v v8, v7 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t @@ -926,8 +926,8 @@ define @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv1f32( %va, @vp_roundtozero_nxv1f32_unmasked( @vp_roundtozero_nxv2f32( %va, @vp_roundtozero_nxv2f32_unmasked( @vp_roundtozero_nxv4f32( %va, @vp_roundtozero_nxv4f32_unmasked( @vp_roundtozero_nxv8f32( %va, @vp_roundtozero_nxv8f32_unmasked( @vp_roundtozero_nxv16f32( %va, ; CHECK-LABEL: vp_roundtozero_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1195,8 +1195,8 @@ define @vp_roundtozero_nxv16f32_unmasked( @vp_roundtozero_nxv1f64( %va, ; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -1237,8 +1237,8 @@ define @vp_roundtozero_nxv1f64_unmasked( @llvm.vp.roundtozero.nxv2f64( define @vp_roundtozero_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI36_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) ; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -1281,8 +1281,8 @@ define @vp_roundtozero_nxv2f64_unmasked( @llvm.vp.roundtozero.nxv4f64( define @vp_roundtozero_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI38_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) ; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1325,8 +1325,8 @@ define @vp_roundtozero_nxv4f64_unmasked( @llvm.vp.roundtozero.nxv7f64( define @vp_roundtozero_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI40_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1369,8 +1369,8 @@ define @vp_roundtozero_nxv7f64_unmasked( @llvm.vp.roundtozero.nxv8f64( define @vp_roundtozero_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) ; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1413,8 +1413,8 @@ define @vp_roundtozero_nxv8f64_unmasked( @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: fsrmi a3, 1 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 1 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a3 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -1462,10 +1462,11 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v6, v7 ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -1487,12 +1488,12 @@ define @vp_roundtozero_nxv16f64_unmasked( @vp_roundtozero_nxv16f64_unmasked( @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @icmp_eq_vv_nxv32i32( %va, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, %v, ptr %ptr, i32 sig ; CHECK-NEXT: mul a4, a4, a1 ; CHECK-NEXT: srli a3, a3, 3 ; CHECK-NEXT: sltu a2, a2, a5 -; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a3 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a5 -; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-NEXT: ret @@ -623,8 +623,8 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a7, a4 ; CHECK-NEXT: .LBB48_4: -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a1), a2, v0.t ; CHECK-NEXT: sub a0, a5, a4 @@ -648,9 +648,9 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: .LBB48_6: ; CHECK-NEXT: mul a3, a5, a2 ; CHECK-NEXT: srli a4, a4, 2 -; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v7, a4 ; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v7, a4 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v24, (a1), a2, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll index 68e0c0089d0c7..a5dd27149c1f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll @@ -7,10 +7,10 @@ define @umulo_nxv1i8( %x, % ; CHECK-LABEL: umulo_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv1i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -25,10 +25,10 @@ define @umulo_nxv2i8( %x, % ; CHECK-LABEL: umulo_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv2i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -43,10 +43,10 @@ define @umulo_nxv4i8( %x, % ; CHECK-LABEL: umulo_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv4i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -61,10 +61,10 @@ define @umulo_nxv8i8( %x, % ; CHECK-LABEL: umulo_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv8i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -79,10 +79,10 @@ define @umulo_nxv16i8( %x, , } @llvm.umul.with.overflow.nxv16i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -97,10 +97,10 @@ define @umulo_nxv32i8( %x, , } @llvm.umul.with.overflow.nxv32i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -115,10 +115,10 @@ define @umulo_nxv64i8( %x, , } @llvm.umul.with.overflow.nxv64i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -133,10 +133,10 @@ define @umulo_nxv1i16( %x, , } @llvm.umul.with.overflow.nxv1i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -151,10 +151,10 @@ define @umulo_nxv2i16( %x, , } @llvm.umul.with.overflow.nxv2i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -169,10 +169,10 @@ define @umulo_nxv4i16( %x, , } @llvm.umul.with.overflow.nxv4i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -187,10 +187,10 @@ define @umulo_nxv8i16( %x, , } @llvm.umul.with.overflow.nxv8i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -205,10 +205,10 @@ define @umulo_nxv16i16( %x, , } @llvm.umul.with.overflow.nxv16i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -223,10 +223,10 @@ define @umulo_nxv32i16( %x, , } @llvm.umul.with.overflow.nxv32i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -241,10 +241,10 @@ define @umulo_nxv1i32( %x, , } @llvm.umul.with.overflow.nxv1i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -259,10 +259,10 @@ define @umulo_nxv2i32( %x, , } @llvm.umul.with.overflow.nxv2i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -277,10 +277,10 @@ define @umulo_nxv4i32( %x, , } @llvm.umul.with.overflow.nxv4i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -295,10 +295,10 @@ define @umulo_nxv8i32( %x, , } @llvm.umul.with.overflow.nxv8i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -313,10 +313,10 @@ define @umulo_nxv16i32( %x, , } @llvm.umul.with.overflow.nxv16i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -331,10 +331,10 @@ define @umulo_nxv1i64( %x, , } @llvm.umul.with.overflow.nxv1i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -349,10 +349,10 @@ define @umulo_nxv2i64( %x, , } @llvm.umul.with.overflow.nxv2i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -367,10 +367,10 @@ define @umulo_nxv4i64( %x, , } @llvm.umul.with.overflow.nxv4i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -385,10 +385,10 @@ define @umulo_nxv8i64( %x, , } @llvm.umul.with.overflow.nxv8i64( %x, %y) %b = extractvalue { , } %a, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index 20e68b13bae10..58880453cdbf8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -12,13 +12,12 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-NEXT: vmerge.vim v9, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vnsrl.wi v10, v9, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vnsrl.wi v9, v9, 8 +; CHECK-NEXT: vnsrl.wi v10, v9, 0 ; CHECK-NEXT: vnsrl.wi v11, v8, 0 +; CHECK-NEXT: vnsrl.wi v9, v9, 8 ; CHECK-NEXT: vnsrl.wi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vi v10, v11, 8 @@ -91,29 +90,27 @@ define {<4 x i64>, <4 x i64>} @vector_deinterleave_v4i64_v8i64(<8 x i64> %vec) { ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v8, 4 +; CHECK-NEXT: vslidedown.vi v12, v8, 4 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v10, 2 -; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslideup.vi v14, v12, 2 +; CHECK-NEXT: vslideup.vi v14, v12, 1, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v11, 12 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vslideup.vi v14, v16, 2 -; CHECK-NEXT: vslideup.vi v14, v16, 1, v0.t +; CHECK-NEXT: vslidedown.vi v16, v8, 1 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vslidedown.vi v12, v8, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vmerge.vvm v12, v12, v14, v0 -; CHECK-NEXT: vslidedown.vi v14, v8, 1 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vslidedown.vi v14, v8, 2, v0.t +; CHECK-NEXT: vslidedown.vi v16, v8, 2, v0.t +; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 4 -; CHECK-NEXT: vmv2r.v v8, v16 +; CHECK-NEXT: vmv2r.v v18, v12 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t +; CHECK-NEXT: vslideup.vi v18, v12, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vmerge.vvm v10, v14, v8, v0 -; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vmerge.vvm v8, v8, v14, v0 +; CHECK-NEXT: vmerge.vvm v10, v16, v18, v0 ; CHECK-NEXT: ret %retval = call {<4 x i64>, <4 x i64>} @llvm.vector.deinterleave2.v8i64(<8 x i64> %vec) ret {<4 x i64>, <4 x i64>} %retval @@ -135,15 +132,13 @@ define {<8 x i64>, <8 x i64>} @vector_deinterleave_v8i64_v16i64(<16 x i64> %vec) ; CHECK-NEXT: vmv.s.x v21, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vcompress.vm v16, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vadd.vi v22, v20, -8 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vcompress.vm v12, v8, v21 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vadd.vi v8, v20, -7 +; CHECK-NEXT: vadd.vi v8, v20, -8 +; CHECK-NEXT: vadd.vi v9, v20, -7 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vrgatherei16.vv v16, v24, v22, v0.t -; CHECK-NEXT: vrgatherei16.vv v12, v24, v8, v0.t +; CHECK-NEXT: vrgatherei16.vv v16, v24, v8, v0.t +; CHECK-NEXT: vrgatherei16.vv v12, v24, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %retval = call {<8 x i64>, <8 x i64>} @llvm.vector.deinterleave2.v16i64(<16 x i64> %vec) @@ -499,29 +494,27 @@ define {<4 x double>, <4 x double>} @vector_deinterleave_v4f64_v8f64(<8 x double ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v8, 4 +; CHECK-NEXT: vslidedown.vi v12, v8, 4 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v10, 2 -; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslideup.vi v14, v12, 2 +; CHECK-NEXT: vslideup.vi v14, v12, 1, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v11, 12 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vslideup.vi v14, v16, 2 -; CHECK-NEXT: vslideup.vi v14, v16, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vslidedown.vi v12, v8, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vmerge.vvm v12, v12, v14, v0 -; CHECK-NEXT: vslidedown.vi v14, v8, 1 +; CHECK-NEXT: vslidedown.vi v16, v8, 1 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vslidedown.vi v14, v8, 2, v0.t +; CHECK-NEXT: vslidedown.vi v16, v8, 2, v0.t +; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 4 -; CHECK-NEXT: vmv2r.v v8, v16 +; CHECK-NEXT: vmv2r.v v18, v12 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t +; CHECK-NEXT: vslideup.vi v18, v12, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vmerge.vvm v10, v14, v8, v0 -; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vmerge.vvm v8, v8, v14, v0 +; CHECK-NEXT: vmerge.vvm v10, v16, v18, v0 ; CHECK-NEXT: ret %retval = call {<4 x double>, <4 x double>} @llvm.vector.deinterleave2.v8f64(<8 x double> %vec) ret {<4 x double>, <4 x double>} %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 745c2cd72a6f8..8a64c828a181c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -12,11 +12,12 @@ define {, } @vector_deinterleave_load_nxv16i ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vlm.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmerge.vim v10, v12, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index c3a51986a7f8c..f8044fde7fea2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -452,23 +452,24 @@ define {, , } @vector_dein ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vmerge.vim v16, v10, 1, v0 ; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: vslidedown.vx v8, v0, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vim v16, v10, 1, v0 ; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v9, v0, a0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmerge.vim v18, v10, 1, v0 -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v8, a0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v20, v10, 1, v0 -; CHECK-NEXT: vs8r.v v16, (a0) -; CHECK-NEXT: vlseg3e8.v v8, (a0) +; CHECK-NEXT: vs8r.v v16, (a1) +; CHECK-NEXT: vlseg3e8.v v8, (a1) ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vmsne.vi v9, v12, 0 @@ -580,33 +581,33 @@ define {, , , , , , %a, @vector_interleave_nxv32i1_nxv16i1( ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 @@ -34,10 +34,10 @@ define @vector_interleave_nxv32i1_nxv16i1( ; ZVBB: # %bb.0: ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; ZVBB-NEXT: vmv1r.v v9, v0 -; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmv.v.i v10, 0 ; ZVBB-NEXT: li a0, 1 ; ZVBB-NEXT: csrr a1, vlenb +; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmerge.vim v10, v10, 1, v0 ; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vwsll.vi v12, v10, 8 @@ -130,9 +130,9 @@ define @vector_interleave_nxv4i64_nxv2i64( ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: vand.vi v13, v12, 1 -; CHECK-NEXT: vmsne.vi v0, v13, 0 ; CHECK-NEXT: vsrl.vi v16, v12, 1 +; CHECK-NEXT: vand.vi v12, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -145,9 +145,9 @@ define @vector_interleave_nxv4i64_nxv2i64( ; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; ZVBB-NEXT: vid.v v12 ; ZVBB-NEXT: srli a0, a0, 2 -; ZVBB-NEXT: vand.vi v13, v12, 1 -; ZVBB-NEXT: vmsne.vi v0, v13, 0 ; ZVBB-NEXT: vsrl.vi v16, v12, 1 +; ZVBB-NEXT: vand.vi v12, v12, 1 +; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: vadd.vx v16, v16, a0, v0.t ; ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; ZVBB-NEXT: vrgatherei16.vv v12, v8, v16 @@ -163,9 +163,9 @@ define @vector_interleave_nxv128i1_nxv64i1( @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv16i64_nxv8i64( @llvm.vector.interleave2.nxv16i64( %a, %b) @@ -545,9 +545,9 @@ define @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv16f64_nxv8f64( @vector_interleave_nxv16f64_nxv8f64( @llvm.vector.interleave2.nxv16f64( %a, %b) @@ -746,10 +746,10 @@ define @vector_interleave_nxv48i1_nxv16i1( ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 ; CHECK-NEXT: slli a2, a1, 1 ; CHECK-NEXT: vmv1r.v v0, v10 @@ -789,10 +789,10 @@ define @vector_interleave_nxv48i1_nxv16i1( ; ZVBB-NEXT: sub sp, sp, a0 ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; ZVBB-NEXT: vmv1r.v v10, v0 -; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmv.v.i v12, 0 ; ZVBB-NEXT: addi a0, sp, 16 ; ZVBB-NEXT: csrr a1, vlenb +; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmerge.vim v16, v12, 1, v0 ; ZVBB-NEXT: slli a2, a1, 1 ; ZVBB-NEXT: vmv1r.v v0, v10 @@ -1058,14 +1058,14 @@ define @vector_interleave_nxv80i1_nxv16i1( ; CHECK-NEXT: vmerge.vim v14, v12, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v18, v12, 1, v0 -; CHECK-NEXT: add a2, a4, a1 -; CHECK-NEXT: srli a3, a1, 2 ; CHECK-NEXT: vmv2r.v v20, v14 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 -; CHECK-NEXT: vmv1r.v v21, v18 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 +; CHECK-NEXT: add a2, a4, a1 +; CHECK-NEXT: vmv1r.v v21, v18 +; CHECK-NEXT: srli a3, a1, 2 ; CHECK-NEXT: vmv1r.v v22, v16 ; CHECK-NEXT: vmv1r.v v16, v19 ; CHECK-NEXT: add a5, a2, a1 @@ -1134,14 +1134,14 @@ define @vector_interleave_nxv80i1_nxv16i1( ; ZVBB-NEXT: vmerge.vim v14, v12, 1, v0 ; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmerge.vim v18, v12, 1, v0 -; ZVBB-NEXT: add a2, a4, a1 -; ZVBB-NEXT: srli a3, a1, 2 ; ZVBB-NEXT: vmv2r.v v20, v14 ; ZVBB-NEXT: vmv1r.v v0, v9 ; ZVBB-NEXT: vmerge.vim v16, v12, 1, v0 -; ZVBB-NEXT: vmv1r.v v21, v18 ; ZVBB-NEXT: vmv1r.v v0, v10 ; ZVBB-NEXT: vmerge.vim v8, v12, 1, v0 +; ZVBB-NEXT: add a2, a4, a1 +; ZVBB-NEXT: vmv1r.v v21, v18 +; ZVBB-NEXT: srli a3, a1, 2 ; ZVBB-NEXT: vmv1r.v v22, v16 ; ZVBB-NEXT: vmv1r.v v16, v19 ; ZVBB-NEXT: add a5, a2, a1 @@ -2073,65 +2073,65 @@ define @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @splice_nxv1i1_offset_negone( %a, @llvm.vector.splice.nxv1i1( %a, %b, i32 -1) @@ -35,19 +35,19 @@ define @splice_nxv1i1_offset_max( %a, @llvm.vector.splice.nxv1i1( %a, %b, i32 1) @@ -61,17 +61,17 @@ define @splice_nxv2i1_offset_negone( %a, @llvm.vector.splice.nxv2i1( %a, %b, i32 -1) @@ -83,19 +83,19 @@ define @splice_nxv2i1_offset_max( %a, @llvm.vector.splice.nxv2i1( %a, %b, i32 3) @@ -109,17 +109,17 @@ define @splice_nxv4i1_offset_negone( %a, @llvm.vector.splice.nxv4i1( %a, %b, i32 -1) @@ -131,19 +131,19 @@ define @splice_nxv4i1_offset_max( %a, @llvm.vector.splice.nxv4i1( %a, %b, i32 7) @@ -157,16 +157,16 @@ define @splice_nxv8i1_offset_negone( %a, @llvm.vector.splice.nxv8i1( %a, %b, i32 -1) @@ -178,18 +178,18 @@ define @splice_nxv8i1_offset_max( %a, @llvm.vector.splice.nxv8i1( %a, %b, i32 15) @@ -203,9 +203,9 @@ define @splice_nxv16i1_offset_negone( %a, < ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 @@ -225,9 +225,9 @@ define @splice_nxv16i1_offset_max( %a, @splice_nxv32i1_offset_negone( %a, < ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 @@ -298,9 +298,9 @@ define @splice_nxv64i1_offset_negone( %a, < ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll index ec6ab422d6405..1c9f42ce9b208 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll @@ -57,8 +57,8 @@ define @fcmp_ogt_vf_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -73,8 +73,8 @@ define @fcmp_ogt_fv_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -102,8 +102,8 @@ define @fcmp_oge_vf_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -118,8 +118,8 @@ define @fcmp_oge_fv_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -147,8 +147,8 @@ define @fcmp_olt_vf_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -163,8 +163,8 @@ define @fcmp_olt_fv_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -192,8 +192,8 @@ define @fcmp_ole_vf_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -208,8 +208,8 @@ define @fcmp_ole_fv_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -388,8 +388,8 @@ define @fcmp_ugt_vf_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -405,8 +405,8 @@ define @fcmp_ugt_fv_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -436,8 +436,8 @@ define @fcmp_uge_vf_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -453,8 +453,8 @@ define @fcmp_uge_fv_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -484,8 +484,8 @@ define @fcmp_ult_vf_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -501,8 +501,8 @@ define @fcmp_ult_fv_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -532,8 +532,8 @@ define @fcmp_ule_vf_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -549,8 +549,8 @@ define @fcmp_ule_fv_nxv1f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -690,8 +690,8 @@ define @fcmp_ogt_vf_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -706,8 +706,8 @@ define @fcmp_ogt_fv_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -735,8 +735,8 @@ define @fcmp_oge_vf_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -751,8 +751,8 @@ define @fcmp_oge_fv_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -780,8 +780,8 @@ define @fcmp_olt_vf_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -796,8 +796,8 @@ define @fcmp_olt_fv_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -825,8 +825,8 @@ define @fcmp_ole_vf_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -841,8 +841,8 @@ define @fcmp_ole_fv_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1021,8 +1021,8 @@ define @fcmp_ugt_vf_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1038,8 +1038,8 @@ define @fcmp_ugt_fv_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1069,8 +1069,8 @@ define @fcmp_uge_vf_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1086,8 +1086,8 @@ define @fcmp_uge_fv_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1117,8 +1117,8 @@ define @fcmp_ult_vf_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1134,8 +1134,8 @@ define @fcmp_ult_fv_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1165,8 +1165,8 @@ define @fcmp_ule_vf_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1182,8 +1182,8 @@ define @fcmp_ule_fv_nxv2f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1323,8 +1323,8 @@ define @fcmp_ogt_vf_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1339,8 +1339,8 @@ define @fcmp_ogt_fv_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1368,8 +1368,8 @@ define @fcmp_oge_vf_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1384,8 +1384,8 @@ define @fcmp_oge_fv_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1413,8 +1413,8 @@ define @fcmp_olt_vf_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1429,8 +1429,8 @@ define @fcmp_olt_fv_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1458,8 +1458,8 @@ define @fcmp_ole_vf_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1474,8 +1474,8 @@ define @fcmp_ole_fv_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -1654,8 +1654,8 @@ define @fcmp_ugt_vf_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1671,8 +1671,8 @@ define @fcmp_ugt_fv_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1702,8 +1702,8 @@ define @fcmp_uge_vf_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1719,8 +1719,8 @@ define @fcmp_uge_fv_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1750,8 +1750,8 @@ define @fcmp_ult_vf_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1767,8 +1767,8 @@ define @fcmp_ult_fv_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1798,8 +1798,8 @@ define @fcmp_ule_vf_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1815,8 +1815,8 @@ define @fcmp_ule_fv_nxv4f16( %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -1944,8 +1944,8 @@ define @fcmp_ogt_vv_nxv8f16( %va, @fcmp_ogt_vf_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1978,8 +1978,8 @@ define @fcmp_ogt_fv_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -1995,8 +1995,8 @@ define @fcmp_oge_vv_nxv8f16( %va, @fcmp_oge_vf_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2029,8 +2029,8 @@ define @fcmp_oge_fv_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2046,8 +2046,8 @@ define @fcmp_olt_vv_nxv8f16( %va, @fcmp_olt_vf_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2080,8 +2080,8 @@ define @fcmp_olt_fv_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2097,8 +2097,8 @@ define @fcmp_ole_vv_nxv8f16( %va, @fcmp_ole_vf_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2131,8 +2131,8 @@ define @fcmp_ole_fv_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -2148,12 +2148,12 @@ define @fcmp_one_vv_nxv8f16( %va, @llvm.experimental.constrained.fcmp.nxv8f16( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -2166,12 +2166,12 @@ define @fcmp_one_vf_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2186,12 +2186,12 @@ define @fcmp_one_fv_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2247,12 +2247,12 @@ define @fcmp_ueq_vv_nxv8f16( %va, @llvm.experimental.constrained.fcmp.nxv8f16( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -2265,12 +2265,12 @@ define @fcmp_ueq_vf_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2285,12 +2285,12 @@ define @fcmp_ueq_fv_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2304,8 +2304,8 @@ define @fcmp_ugt_vv_nxv8f16( %va, @fcmp_ugt_vf_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -2338,8 +2338,8 @@ define @fcmp_ugt_fv_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -2355,8 +2355,8 @@ define @fcmp_uge_vv_nxv8f16( %va, @fcmp_uge_vf_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -2389,8 +2389,8 @@ define @fcmp_uge_fv_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -2406,8 +2406,8 @@ define @fcmp_ult_vv_nxv8f16( %va, @fcmp_ult_vf_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -2440,8 +2440,8 @@ define @fcmp_ult_fv_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -2457,8 +2457,8 @@ define @fcmp_ule_vv_nxv8f16( %va, @fcmp_ule_vf_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -2491,8 +2491,8 @@ define @fcmp_ule_fv_nxv8f16( %va, half %b) ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -2619,8 +2619,8 @@ define @fcmp_ogt_vv_nxv16f16( %va, @fcmp_ogt_vf_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2653,8 +2653,8 @@ define @fcmp_ogt_fv_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2670,8 +2670,8 @@ define @fcmp_oge_vv_nxv16f16( %va, @fcmp_oge_vf_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2704,8 +2704,8 @@ define @fcmp_oge_fv_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2721,8 +2721,8 @@ define @fcmp_olt_vv_nxv16f16( %va, @fcmp_olt_vf_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2755,8 +2755,8 @@ define @fcmp_olt_fv_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2772,8 +2772,8 @@ define @fcmp_ole_vv_nxv16f16( %va, @fcmp_ole_vf_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2806,8 +2806,8 @@ define @fcmp_ole_fv_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2823,12 +2823,12 @@ define @fcmp_one_vv_nxv16f16( %va, @llvm.experimental.constrained.fcmp.nxv16f16( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -2841,12 +2841,12 @@ define @fcmp_one_vf_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2861,12 +2861,12 @@ define @fcmp_one_fv_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2922,12 +2922,12 @@ define @fcmp_ueq_vv_nxv16f16( %va, @llvm.experimental.constrained.fcmp.nxv16f16( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -2940,12 +2940,12 @@ define @fcmp_ueq_vf_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2960,12 +2960,12 @@ define @fcmp_ueq_fv_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2979,8 +2979,8 @@ define @fcmp_ugt_vv_nxv16f16( %va, @fcmp_ugt_vf_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3013,8 +3013,8 @@ define @fcmp_ugt_fv_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3030,8 +3030,8 @@ define @fcmp_uge_vv_nxv16f16( %va, @fcmp_uge_vf_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3064,8 +3064,8 @@ define @fcmp_uge_fv_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3081,8 +3081,8 @@ define @fcmp_ult_vv_nxv16f16( %va, @fcmp_ult_vf_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3115,8 +3115,8 @@ define @fcmp_ult_fv_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3132,8 +3132,8 @@ define @fcmp_ule_vv_nxv16f16( %va, @fcmp_ule_vf_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3166,8 +3166,8 @@ define @fcmp_ule_fv_nxv16f16( %va, half % ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -3294,8 +3294,8 @@ define @fcmp_ogt_vv_nxv32f16( %va, @fcmp_ogt_vf_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3328,8 +3328,8 @@ define @fcmp_ogt_fv_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3345,8 +3345,8 @@ define @fcmp_oge_vv_nxv32f16( %va, @fcmp_oge_vf_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3379,8 +3379,8 @@ define @fcmp_oge_fv_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3396,8 +3396,8 @@ define @fcmp_olt_vv_nxv32f16( %va, @fcmp_olt_vf_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3430,8 +3430,8 @@ define @fcmp_olt_fv_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3447,8 +3447,8 @@ define @fcmp_ole_vv_nxv32f16( %va, @fcmp_ole_vf_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3481,8 +3481,8 @@ define @fcmp_ole_fv_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -3498,12 +3498,12 @@ define @fcmp_one_vv_nxv32f16( %va, @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -3516,12 +3516,12 @@ define @fcmp_one_vf_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -3536,12 +3536,12 @@ define @fcmp_one_fv_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -3597,12 +3597,12 @@ define @fcmp_ueq_vv_nxv32f16( %va, @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -3615,12 +3615,12 @@ define @fcmp_ueq_vf_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -3635,12 +3635,12 @@ define @fcmp_ueq_fv_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -3654,8 +3654,8 @@ define @fcmp_ugt_vv_nxv32f16( %va, @fcmp_ugt_vf_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3688,8 +3688,8 @@ define @fcmp_ugt_fv_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3705,8 +3705,8 @@ define @fcmp_uge_vv_nxv32f16( %va, @fcmp_uge_vf_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3739,8 +3739,8 @@ define @fcmp_uge_fv_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3756,8 +3756,8 @@ define @fcmp_ult_vv_nxv32f16( %va, @fcmp_ult_vf_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3790,8 +3790,8 @@ define @fcmp_ult_fv_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3807,8 +3807,8 @@ define @fcmp_ule_vv_nxv32f16( %va, @fcmp_ule_vf_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3841,8 +3841,8 @@ define @fcmp_ule_fv_nxv32f16( %va, half % ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -3981,8 +3981,8 @@ define @fcmp_ogt_vf_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -3997,8 +3997,8 @@ define @fcmp_ogt_fv_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4026,8 +4026,8 @@ define @fcmp_oge_vf_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4042,8 +4042,8 @@ define @fcmp_oge_fv_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4071,8 +4071,8 @@ define @fcmp_olt_vf_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4087,8 +4087,8 @@ define @fcmp_olt_fv_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4116,8 +4116,8 @@ define @fcmp_ole_vf_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4132,8 +4132,8 @@ define @fcmp_ole_fv_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4312,8 +4312,8 @@ define @fcmp_ugt_vf_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4329,8 +4329,8 @@ define @fcmp_ugt_fv_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4360,8 +4360,8 @@ define @fcmp_uge_vf_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4377,8 +4377,8 @@ define @fcmp_uge_fv_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4408,8 +4408,8 @@ define @fcmp_ult_vf_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4425,8 +4425,8 @@ define @fcmp_ult_fv_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4456,8 +4456,8 @@ define @fcmp_ule_vf_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4473,8 +4473,8 @@ define @fcmp_ule_fv_nxv1f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4614,8 +4614,8 @@ define @fcmp_ogt_vf_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4630,8 +4630,8 @@ define @fcmp_ogt_fv_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4659,8 +4659,8 @@ define @fcmp_oge_vf_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4675,8 +4675,8 @@ define @fcmp_oge_fv_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4704,8 +4704,8 @@ define @fcmp_olt_vf_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4720,8 +4720,8 @@ define @fcmp_olt_fv_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4749,8 +4749,8 @@ define @fcmp_ole_vf_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4765,8 +4765,8 @@ define @fcmp_ole_fv_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -4945,8 +4945,8 @@ define @fcmp_ugt_vf_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4962,8 +4962,8 @@ define @fcmp_ugt_fv_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -4993,8 +4993,8 @@ define @fcmp_uge_vf_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5010,8 +5010,8 @@ define @fcmp_uge_fv_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5041,8 +5041,8 @@ define @fcmp_ult_vf_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5058,8 +5058,8 @@ define @fcmp_ult_fv_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5089,8 +5089,8 @@ define @fcmp_ule_vf_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5106,8 +5106,8 @@ define @fcmp_ule_fv_nxv2f32( %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -5235,8 +5235,8 @@ define @fcmp_ogt_vv_nxv4f32( %va, @fcmp_ogt_vf_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5269,8 +5269,8 @@ define @fcmp_ogt_fv_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5286,8 +5286,8 @@ define @fcmp_oge_vv_nxv4f32( %va, @fcmp_oge_vf_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5320,8 +5320,8 @@ define @fcmp_oge_fv_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5337,8 +5337,8 @@ define @fcmp_olt_vv_nxv4f32( %va, @fcmp_olt_vf_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5371,8 +5371,8 @@ define @fcmp_olt_fv_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5388,8 +5388,8 @@ define @fcmp_ole_vv_nxv4f32( %va, @fcmp_ole_vf_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5422,8 +5422,8 @@ define @fcmp_ole_fv_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -5439,12 +5439,12 @@ define @fcmp_one_vv_nxv4f32( %va, @llvm.experimental.constrained.fcmp.nxv4f32( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -5457,12 +5457,12 @@ define @fcmp_one_vf_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -5477,12 +5477,12 @@ define @fcmp_one_fv_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -5538,12 +5538,12 @@ define @fcmp_ueq_vv_nxv4f32( %va, @llvm.experimental.constrained.fcmp.nxv4f32( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -5556,12 +5556,12 @@ define @fcmp_ueq_vf_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -5576,12 +5576,12 @@ define @fcmp_ueq_fv_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -5595,8 +5595,8 @@ define @fcmp_ugt_vv_nxv4f32( %va, @fcmp_ugt_vf_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -5629,8 +5629,8 @@ define @fcmp_ugt_fv_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -5646,8 +5646,8 @@ define @fcmp_uge_vv_nxv4f32( %va, @fcmp_uge_vf_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -5680,8 +5680,8 @@ define @fcmp_uge_fv_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -5697,8 +5697,8 @@ define @fcmp_ult_vv_nxv4f32( %va, @fcmp_ult_vf_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -5731,8 +5731,8 @@ define @fcmp_ult_fv_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -5748,8 +5748,8 @@ define @fcmp_ule_vv_nxv4f32( %va, @fcmp_ule_vf_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -5782,8 +5782,8 @@ define @fcmp_ule_fv_nxv4f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -5910,8 +5910,8 @@ define @fcmp_ogt_vv_nxv8f32( %va, @fcmp_ogt_vf_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -5944,8 +5944,8 @@ define @fcmp_ogt_fv_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -5961,8 +5961,8 @@ define @fcmp_oge_vv_nxv8f32( %va, @fcmp_oge_vf_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -5995,8 +5995,8 @@ define @fcmp_oge_fv_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6012,8 +6012,8 @@ define @fcmp_olt_vv_nxv8f32( %va, @fcmp_olt_vf_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6046,8 +6046,8 @@ define @fcmp_olt_fv_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6063,8 +6063,8 @@ define @fcmp_ole_vv_nxv8f32( %va, @fcmp_ole_vf_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6097,8 +6097,8 @@ define @fcmp_ole_fv_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -6114,12 +6114,12 @@ define @fcmp_one_vv_nxv8f32( %va, @llvm.experimental.constrained.fcmp.nxv8f32( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -6132,12 +6132,12 @@ define @fcmp_one_vf_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6152,12 +6152,12 @@ define @fcmp_one_fv_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6213,12 +6213,12 @@ define @fcmp_ueq_vv_nxv8f32( %va, @llvm.experimental.constrained.fcmp.nxv8f32( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -6231,12 +6231,12 @@ define @fcmp_ueq_vf_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6251,12 +6251,12 @@ define @fcmp_ueq_fv_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6270,8 +6270,8 @@ define @fcmp_ugt_vv_nxv8f32( %va, @fcmp_ugt_vf_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6304,8 +6304,8 @@ define @fcmp_ugt_fv_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6321,8 +6321,8 @@ define @fcmp_uge_vv_nxv8f32( %va, @fcmp_uge_vf_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6355,8 +6355,8 @@ define @fcmp_uge_fv_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6372,8 +6372,8 @@ define @fcmp_ult_vv_nxv8f32( %va, @fcmp_ult_vf_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6406,8 +6406,8 @@ define @fcmp_ult_fv_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6423,8 +6423,8 @@ define @fcmp_ule_vv_nxv8f32( %va, @fcmp_ule_vf_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6457,8 +6457,8 @@ define @fcmp_ule_fv_nxv8f32( %va, float %b ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -6585,8 +6585,8 @@ define @fcmp_ogt_vv_nxv16f32( %va, @fcmp_ogt_vf_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6619,8 +6619,8 @@ define @fcmp_ogt_fv_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6636,8 +6636,8 @@ define @fcmp_oge_vv_nxv16f32( %va, @fcmp_oge_vf_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6670,8 +6670,8 @@ define @fcmp_oge_fv_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6687,8 +6687,8 @@ define @fcmp_olt_vv_nxv16f32( %va, @fcmp_olt_vf_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6721,8 +6721,8 @@ define @fcmp_olt_fv_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6738,8 +6738,8 @@ define @fcmp_ole_vv_nxv16f32( %va, @fcmp_ole_vf_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6772,8 +6772,8 @@ define @fcmp_ole_fv_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -6789,12 +6789,12 @@ define @fcmp_one_vv_nxv16f32( %va, @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -6807,12 +6807,12 @@ define @fcmp_one_vf_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6827,12 +6827,12 @@ define @fcmp_one_fv_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6888,12 +6888,12 @@ define @fcmp_ueq_vv_nxv16f32( %va, @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -6906,12 +6906,12 @@ define @fcmp_ueq_vf_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6926,12 +6926,12 @@ define @fcmp_ueq_fv_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6945,8 +6945,8 @@ define @fcmp_ugt_vv_nxv16f32( %va, @fcmp_ugt_vf_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -6979,8 +6979,8 @@ define @fcmp_ugt_fv_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -6996,8 +6996,8 @@ define @fcmp_uge_vv_nxv16f32( %va, @fcmp_uge_vf_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -7030,8 +7030,8 @@ define @fcmp_uge_fv_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -7047,8 +7047,8 @@ define @fcmp_ult_vv_nxv16f32( %va, @fcmp_ult_vf_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -7081,8 +7081,8 @@ define @fcmp_ult_fv_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -7098,8 +7098,8 @@ define @fcmp_ule_vv_nxv16f32( %va, @fcmp_ule_vf_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -7132,8 +7132,8 @@ define @fcmp_ule_fv_nxv16f32( %va, float ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -7272,8 +7272,8 @@ define @fcmp_ogt_vf_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7288,8 +7288,8 @@ define @fcmp_ogt_fv_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7317,8 +7317,8 @@ define @fcmp_oge_vf_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7333,8 +7333,8 @@ define @fcmp_oge_fv_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7362,8 +7362,8 @@ define @fcmp_olt_vf_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7378,8 +7378,8 @@ define @fcmp_olt_fv_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7407,8 +7407,8 @@ define @fcmp_ole_vf_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7423,8 +7423,8 @@ define @fcmp_ole_fv_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret @@ -7603,8 +7603,8 @@ define @fcmp_ugt_vf_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7620,8 +7620,8 @@ define @fcmp_ugt_fv_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7651,8 +7651,8 @@ define @fcmp_uge_vf_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7668,8 +7668,8 @@ define @fcmp_uge_fv_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7699,8 +7699,8 @@ define @fcmp_ult_vf_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7716,8 +7716,8 @@ define @fcmp_ult_fv_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7747,8 +7747,8 @@ define @fcmp_ule_vf_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v9, v10 ; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7764,8 +7764,8 @@ define @fcmp_ule_fv_nxv1f64( %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmfeq.vf v9, v9, fa0 +; CHECK-NEXT: vmfeq.vv v10, v8, v8 ; CHECK-NEXT: vmand.mm v0, v10, v9 ; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v0 @@ -7893,8 +7893,8 @@ define @fcmp_ogt_vv_nxv2f64( %va, @fcmp_ogt_vf_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -7927,8 +7927,8 @@ define @fcmp_ogt_fv_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -7944,8 +7944,8 @@ define @fcmp_oge_vv_nxv2f64( %va, @fcmp_oge_vf_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -7978,8 +7978,8 @@ define @fcmp_oge_fv_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -7995,8 +7995,8 @@ define @fcmp_olt_vv_nxv2f64( %va, @fcmp_olt_vf_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8029,8 +8029,8 @@ define @fcmp_olt_fv_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8046,8 +8046,8 @@ define @fcmp_ole_vv_nxv2f64( %va, @fcmp_ole_vf_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8080,8 +8080,8 @@ define @fcmp_ole_fv_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret @@ -8097,12 +8097,12 @@ define @fcmp_one_vv_nxv2f64( %va, @llvm.experimental.constrained.fcmp.nxv2f64( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -8115,12 +8115,12 @@ define @fcmp_one_vf_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -8135,12 +8135,12 @@ define @fcmp_one_fv_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -8196,12 +8196,12 @@ define @fcmp_ueq_vv_nxv2f64( %va, @llvm.experimental.constrained.fcmp.nxv2f64( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -8214,12 +8214,12 @@ define @fcmp_ueq_vf_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v12, v13 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v12, v13 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -8234,12 +8234,12 @@ define @fcmp_ueq_fv_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 ; CHECK-NEXT: vmfeq.vf v13, v10, fa0 -; CHECK-NEXT: vmand.mm v10, v13, v12 -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v10, v11 +; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v11, v10 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -8253,8 +8253,8 @@ define @fcmp_ugt_vv_nxv2f64( %va, @fcmp_ugt_vf_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8287,8 +8287,8 @@ define @fcmp_ugt_fv_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8304,8 +8304,8 @@ define @fcmp_uge_vv_nxv2f64( %va, @fcmp_uge_vf_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8338,8 +8338,8 @@ define @fcmp_uge_fv_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8355,8 +8355,8 @@ define @fcmp_ult_vv_nxv2f64( %va, @fcmp_ult_vf_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8389,8 +8389,8 @@ define @fcmp_ult_fv_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8406,8 +8406,8 @@ define @fcmp_ule_vv_nxv2f64( %va, @fcmp_ule_vf_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8440,8 +8440,8 @@ define @fcmp_ule_fv_nxv2f64( %va, double ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vmfeq.vf v12, v10, fa0 ; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v10, v10, v12 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v10 ; CHECK-NEXT: ret @@ -8568,8 +8568,8 @@ define @fcmp_ogt_vv_nxv4f64( %va, @fcmp_ogt_vf_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8602,8 +8602,8 @@ define @fcmp_ogt_fv_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8619,8 +8619,8 @@ define @fcmp_oge_vv_nxv4f64( %va, @fcmp_oge_vf_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8653,8 +8653,8 @@ define @fcmp_oge_fv_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8670,8 +8670,8 @@ define @fcmp_olt_vv_nxv4f64( %va, @fcmp_olt_vf_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8704,8 +8704,8 @@ define @fcmp_olt_fv_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8721,8 +8721,8 @@ define @fcmp_ole_vv_nxv4f64( %va, @fcmp_ole_vf_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8755,8 +8755,8 @@ define @fcmp_ole_fv_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -8772,12 +8772,12 @@ define @fcmp_one_vv_nxv4f64( %va, @llvm.experimental.constrained.fcmp.nxv4f64( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -8790,12 +8790,12 @@ define @fcmp_one_vf_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -8810,12 +8810,12 @@ define @fcmp_one_fv_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -8871,12 +8871,12 @@ define @fcmp_ueq_vv_nxv4f64( %va, @llvm.experimental.constrained.fcmp.nxv4f64( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -8889,12 +8889,12 @@ define @fcmp_ueq_vf_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -8909,12 +8909,12 @@ define @fcmp_ueq_fv_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v12 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v12, v13 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v13, v12 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -8928,8 +8928,8 @@ define @fcmp_ugt_vv_nxv4f64( %va, @fcmp_ugt_vf_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -8962,8 +8962,8 @@ define @fcmp_ugt_fv_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -8979,8 +8979,8 @@ define @fcmp_uge_vv_nxv4f64( %va, @fcmp_uge_vf_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9013,8 +9013,8 @@ define @fcmp_uge_fv_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9030,8 +9030,8 @@ define @fcmp_ult_vv_nxv4f64( %va, @fcmp_ult_vf_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9064,8 +9064,8 @@ define @fcmp_ult_fv_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9081,8 +9081,8 @@ define @fcmp_ule_vv_nxv4f64( %va, @fcmp_ule_vf_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v16, v12 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9115,8 +9115,8 @@ define @fcmp_ule_fv_nxv4f64( %va, double ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vmfeq.vf v16, v12, fa0 ; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v12, v12, v16 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v12 ; CHECK-NEXT: ret @@ -9243,8 +9243,8 @@ define @fcmp_ogt_vv_nxv8f64( %va, @fcmp_ogt_vf_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9277,8 +9277,8 @@ define @fcmp_ogt_fv_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9294,8 +9294,8 @@ define @fcmp_oge_vv_nxv8f64( %va, @fcmp_oge_vf_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9328,8 +9328,8 @@ define @fcmp_oge_fv_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9345,8 +9345,8 @@ define @fcmp_olt_vv_nxv8f64( %va, @fcmp_olt_vf_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9379,8 +9379,8 @@ define @fcmp_olt_fv_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9396,8 +9396,8 @@ define @fcmp_ole_vv_nxv8f64( %va, @fcmp_ole_vf_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9430,8 +9430,8 @@ define @fcmp_ole_fv_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -9447,12 +9447,12 @@ define @fcmp_one_vv_nxv8f64( %va, @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -9465,12 +9465,12 @@ define @fcmp_one_vf_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -9485,12 +9485,12 @@ define @fcmp_one_fv_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -9546,12 +9546,12 @@ define @fcmp_ueq_vv_nxv8f64( %va, @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -9564,12 +9564,12 @@ define @fcmp_ueq_vf_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -9584,12 +9584,12 @@ define @fcmp_ueq_fv_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v16 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v16, v17 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v17, v0 +; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v17, v16 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -9603,8 +9603,8 @@ define @fcmp_ugt_vv_nxv8f64( %va, @fcmp_ugt_vf_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9637,8 +9637,8 @@ define @fcmp_ugt_fv_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9654,8 +9654,8 @@ define @fcmp_uge_vv_nxv8f64( %va, @fcmp_uge_vf_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9688,8 +9688,8 @@ define @fcmp_uge_fv_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9705,8 +9705,8 @@ define @fcmp_ult_vv_nxv8f64( %va, @fcmp_ult_vf_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9739,8 +9739,8 @@ define @fcmp_ult_fv_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9756,8 +9756,8 @@ define @fcmp_ule_vv_nxv8f64( %va, @fcmp_ule_vf_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret @@ -9790,8 +9790,8 @@ define @fcmp_ule_fv_nxv8f64( %va, double ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vmfeq.vf v24, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index a2869999094a5..6a63f2e3a52d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -818,51 +818,50 @@ define @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v3, v0 -; CHECK-NEXT: fmv.x.h a2, fa0 +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: vmv.v.x v24, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a1, a3, 1 -; CHECK-NEXT: srli a3, a3, 2 -; CHECK-NEXT: sub a4, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v0, a3 -; CHECK-NEXT: sltu a3, a0, a4 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a3, a3, a4 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 4 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t -; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: mv a3, a2 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28, v0.t ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -1064,52 +1063,50 @@ define @vfma_vf_nxv32bf16_unmasked( ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: fmv.x.h a2, fa0 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; CHECK-NEXT: vmset.m v24 -; CHECK-NEXT: slli a1, a3, 1 -; CHECK-NEXT: srli a3, a3, 2 -; CHECK-NEXT: sub a4, a0, a1 -; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v24, a3 -; CHECK-NEXT: sltu a3, a0, a4 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a3, a3, a4 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 4 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v0, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: slli a1, a2, 1 +; CHECK-NEXT: srli a2, a2, 2 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a2 +; CHECK-NEXT: sltu a2, a0, a3 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t -; CHECK-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: add a2, a2, a4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: mv a3, a2 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28, v0.t ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -2285,51 +2282,50 @@ define @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v3, v0 -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a3, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 +; ZVFHMIN-NEXT: sltu a2, a0, a3 +; ZVFHMIN-NEXT: addi a2, a2, -1 +; ZVFHMIN-NEXT: and a2, a2, a3 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a1, a3, 1 -; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: sub a4, a0, a1 -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 -; ZVFHMIN-NEXT: sltu a3, a0, a4 -; ZVFHMIN-NEXT: addi a3, a3, -1 -; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: mv a3, a2 ; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -2543,52 +2539,50 @@ define @vfma_vf_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 -; ZVFHMIN-NEXT: slli a1, a3, 1 -; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: sub a4, a0, a1 -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3 -; ZVFHMIN-NEXT: sltu a3, a0, a4 -; ZVFHMIN-NEXT: addi a3, a3, -1 -; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v0, a1 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a3, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a3 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: slli a1, a2, 1 +; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: sub a3, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 +; ZVFHMIN-NEXT: sltu a2, a0, a3 +; ZVFHMIN-NEXT: addi a2, a2, -1 +; ZVFHMIN-NEXT: and a2, a2, a3 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 4 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: mv a3, a2 ; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -3487,8 +3481,8 @@ define @vfma_vv_nxv16f64( %va, @vfmsub_vv_nxv1f16( %va, @vfmsub_vf_nxv1f16( %va, half %b, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v9, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret @@ -3738,14 +3732,14 @@ define @vfmsub_vf_nxv1f16_commute( %va, h ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v9, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v11, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4058,8 +4052,8 @@ define @vfnmadd_vf_nxv1f16_neg_splat( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t ; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t @@ -4089,8 +4083,8 @@ define @vfnmadd_vf_nxv1f16_neg_splat_commute( @vfnmsub_vf_nxv1f16( %va, half %b, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4320,14 +4314,14 @@ define @vfnmsub_vf_nxv1f16_commute( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4407,14 +4401,14 @@ define @vfnmsub_vf_nxv1f16_neg_splat( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v10, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v10, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4436,12 +4430,12 @@ define @vfnmsub_vf_nxv1f16_neg_splat_commute( @vfmsub_vv_nxv2f16( %va, @vfmsub_vf_nxv2f16( %va, half %b, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v9, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret @@ -4605,14 +4599,14 @@ define @vfmsub_vf_nxv2f16_commute( %va, h ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v9, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v11, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -4925,8 +4919,8 @@ define @vfnmadd_vf_nxv2f16_neg_splat( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t ; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t @@ -4956,8 +4950,8 @@ define @vfnmadd_vf_nxv2f16_neg_splat_commute( @vfnmsub_vf_nxv2f16( %va, half %b, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5187,14 +5181,14 @@ define @vfnmsub_vf_nxv2f16_commute( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5274,14 +5268,14 @@ define @vfnmsub_vf_nxv2f16_neg_splat( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v10, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v11, v9, v10, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v10, v9, v11, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5303,12 +5297,12 @@ define @vfnmsub_vf_nxv2f16_neg_splat_commute( @vfmsub_vv_nxv4f16( %va, @vfmsub_vf_nxv4f16( %va, half %b, ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 +; ZVFHMIN-NEXT: vmv.v.x v14, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vxor.vx v12, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5472,14 +5466,14 @@ define @vfmsub_vf_nxv4f16_commute( %va, h ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v14, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t +; ZVFHMIN-NEXT: vxor.vx v12, v9, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v10, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -5792,8 +5786,8 @@ define @vfnmadd_vf_nxv4f16_neg_splat( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v14, v10, a0, v0.t ; ZVFHMIN-NEXT: vxor.vx v9, v9, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v14, v10, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t @@ -5823,8 +5817,8 @@ define @vfnmadd_vf_nxv4f16_neg_splat_commute( @vfnmsub_vf_nxv4f16( %va, half %b, ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a1 +; ZVFHMIN-NEXT: vmv.v.x v14, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vxor.vx v12, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v14, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6054,14 +6048,14 @@ define @vfnmsub_vf_nxv4f16_commute( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v14, a1 ; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v12, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6139,16 +6133,16 @@ define @vfnmsub_vf_nxv4f16_neg_splat( %va ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v10, a1 +; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v12, v10, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t +; ZVFHMIN-NEXT: vxor.vx v9, v12, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v10, v14, v12, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6168,14 +6162,14 @@ define @vfnmsub_vf_nxv4f16_neg_splat_commute( @vfmsub_vv_nxv8f16( %va, @vfmsub_vf_nxv8f16( %va, half %b, ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: vmv.v.x v20, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v24, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v10, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6339,14 +6333,14 @@ define @vfmsub_vf_nxv8f16_commute( %va, h ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v20, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v10, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v12, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v12, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6659,8 +6653,8 @@ define @vfnmadd_vf_nxv8f16_neg_splat( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v20, v12, a0, v0.t ; ZVFHMIN-NEXT: vxor.vx v10, v10, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v20, v12, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t @@ -6690,8 +6684,8 @@ define @vfnmadd_vf_nxv8f16_neg_splat_commute( @vfnmsub_vf_nxv8f16( %va, half %b, ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: vmv.v.x v20, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v24, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v20, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -6921,14 +6915,14 @@ define @vfnmsub_vf_nxv8f16_commute( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v20, a1 ; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v8, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7006,16 +7000,16 @@ define @vfnmsub_vf_nxv8f16_neg_splat( %va ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v12, a1 +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v16, v12, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v16, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t +; ZVFHMIN-NEXT: vxor.vx v10, v16, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v12, v20, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7035,14 +7029,14 @@ define @vfnmsub_vf_nxv8f16_neg_splat_commute( @vfmsub_vv_nxv16f16( %va, @vfmsub_vf_nxv16f16( %va, half % ; ; ZVFHMIN-LABEL: vfmsub_vf_nxv16f16: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv4r.v v16, v8 -; ZVFHMIN-NEXT: fmv.x.h a0, fa0 -; ZVFHMIN-NEXT: vmv.v.x v4, a0 +; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v20, v12, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v12, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7209,14 +7201,14 @@ define @vfmsub_vf_nxv16f16_commute( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v12, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7532,8 +7524,8 @@ define @vfnmadd_vf_nxv16f16_neg_splat( ; ZVFHMIN-NEXT: fmv.x.h a0, fa0 ; ZVFHMIN-NEXT: vmv.v.x v8, a0 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t ; ZVFHMIN-NEXT: vxor.vx v12, v12, a0, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t @@ -7564,10 +7556,10 @@ define @vfnmadd_vf_nxv16f16_neg_splat_commute( @vfnmsub_vf_nxv16f16( %va, half ; ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv4r.v v16, v12 -; ZVFHMIN-NEXT: fmv.x.h a0, fa0 -; ZVFHMIN-NEXT: vmv.v.x v4, a0 +; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: lui a0, 8 -; ZVFHMIN-NEXT: vxor.vx v20, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v8, a0, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7798,14 +7789,14 @@ define @vfnmsub_vf_nxv16f16_commute( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v4, a1 ; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7886,14 +7877,14 @@ define @vfnmsub_vf_nxv16f16_neg_splat( ; ZVFHMIN-NEXT: fmv.x.h a0, fa0 ; ZVFHMIN-NEXT: vmv.v.x v8, a0 ; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -7915,14 +7906,14 @@ define @vfnmsub_vf_nxv16f16_neg_splat_commute( @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v8 -; ZVFHMIN-NEXT: vl8re16.v v16, (a0) +; ZVFHMIN-NEXT: vmv8r.v v16, v8 +; ZVFHMIN-NEXT: vl8re16.v v24, (a0) ; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: vmset.m v8 ; ZVFHMIN-NEXT: csrr a3, vlenb @@ -8160,25 +8151,24 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3 ; ZVFHMIN-NEXT: sltu a3, a1, a4 ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2 +; ZVFHMIN-NEXT: vxor.vx v8, v24, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: vmv4r.v v8, v16 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -8187,35 +8177,32 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a1, a0, .LBB281_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: .LBB281_2: -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -8223,15 +8210,12 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 +; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb @@ -8261,54 +8245,53 @@ define @vfmsub_vf_nxv32f16( %va, half % ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v3, v0 -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vxor.vx v16, v16, a1, v0.t -; ZVFHMIN-NEXT: slli a1, a3, 1 -; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: sub a4, a0, a1 -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 -; ZVFHMIN-NEXT: sltu a3, a0, a4 -; ZVFHMIN-NEXT: addi a3, a3, -1 -; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a4, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: slli a1, a3, 1 +; ZVFHMIN-NEXT: srli a3, a3, 2 +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 +; ZVFHMIN-NEXT: sltu a3, a0, a2 +; ZVFHMIN-NEXT: addi a3, a3, -1 +; ZVFHMIN-NEXT: and a2, a3, a2 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: mv a3, a2 ; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -8527,55 +8510,53 @@ define @vfmsub_vf_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vmv.v.x v0, a1 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a4, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v16, a1 +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: sub a4, a0, a1 -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3 -; ZVFHMIN-NEXT: sltu a3, a0, a4 +; ZVFHMIN-NEXT: sltu a3, a0, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 -; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: and a2, a3, a2 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: mv a3, a2 ; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -8591,11 +8572,11 @@ define @vfmsub_vf_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 @@ -8778,27 +8759,17 @@ define @vfnmadd_vv_nxv32f16( %va, @vfnmadd_vv_nxv32f16( %va, @vfnmadd_vv_nxv32f16( %va, @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a2, a3, a2 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vmv8r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -9321,39 +9293,38 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t +; ZVFHMIN-NEXT: vmv.v.v v4, v12 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB290_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB290_2: ; ZVFHMIN-NEXT: vmv1r.v v0, v3 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -9362,20 +9333,17 @@ define @vfnmadd_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t -; ZVFHMIN-NEXT: vmv.v.v v16, v8 +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: vmv4r.v v12, v4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -9539,56 +9507,63 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vmv.v.x v0, a1 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a4, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vxor.vx v16, v16, a1 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a2 +; ZVFHMIN-NEXT: vxor.vx v16, v16, a2 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: sub a4, a0, a1 -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3 -; ZVFHMIN-NEXT: sltu a3, a0, a4 +; ZVFHMIN-NEXT: sltu a3, a0, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 -; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: and a2, a3, a2 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 4 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v8, a2 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vmv8r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: mv a3, a2 +; ZVFHMIN-NEXT: slli a2, a2, 1 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a0, a1, .LBB292_2 @@ -9599,14 +9574,17 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -9614,9 +9592,6 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb @@ -9658,56 +9633,54 @@ define @vfnmadd_vf_nxv32f16_unmasked_commute( @vfnmadd_vf_nxv32f16_unmasked_commute( @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: vxor.vx v8, v16, a2, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: vxor.vx v8, v24, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v16, v24, a2, v0.t ; ZVFHMIN-NEXT: sub a2, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 @@ -9804,20 +9777,20 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a2, a3, a2 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -9825,14 +9798,14 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t ; ZVFHMIN-NEXT: vmv.v.v v4, v12 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB294_2 ; ZVFHMIN-NEXT: # %bb.1: @@ -9840,7 +9813,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: .LBB294_2: ; ZVFHMIN-NEXT: vmv1r.v v0, v3 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload @@ -9849,7 +9822,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -9925,10 +9898,10 @@ define @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v3, v0 -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a4, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a2, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: sub a4, a0, a1 -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 -; ZVFHMIN-NEXT: sltu a3, a0, a4 +; ZVFHMIN-NEXT: sltu a3, a0, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 -; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: and a2, a3, a2 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a2 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: mv a3, a2 ; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -10921,54 +10883,53 @@ define @vfnmsub_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v3, v0 -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a4, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a2, v0.t ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: sub a4, a0, a1 -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 -; ZVFHMIN-NEXT: sltu a3, a0, a4 +; ZVFHMIN-NEXT: sltu a3, a0, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 -; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: and a2, a3, a2 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: mv a3, a2 ; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload @@ -11052,55 +11013,53 @@ define @vfnmsub_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: fmv.x.h a2, fa0 -; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: vmv.v.x v0, a1 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a4, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a2 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: sub a4, a0, a1 -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: sub a2, a0, a1 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3 -; ZVFHMIN-NEXT: sltu a3, a0, a4 +; ZVFHMIN-NEXT: sltu a3, a0, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 -; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: and a2, a3, a2 +; ZVFHMIN-NEXT: csrr a3, vlenb +; ZVFHMIN-NEXT: slli a3, a3, 3 +; ZVFHMIN-NEXT: add a3, sp, a3 +; ZVFHMIN-NEXT: addi a3, a3, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t -; ZVFHMIN-NEXT: addi a4, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a2 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 -; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: mv a4, a2 +; ZVFHMIN-NEXT: mv a3, a2 ; ZVFHMIN-NEXT: slli a2, a2, 1 -; ZVFHMIN-NEXT: add a2, a2, a4 +; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload @@ -11116,11 +11075,11 @@ define @vfnmsub_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 @@ -11171,55 +11130,53 @@ define @vfnmsub_vf_nxv32f16_unmasked_commute( @vfnmsub_vf_nxv32f16_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfma_vv_nxv16f64( %va, @vfsqrt_vv_nxv32bf16( %va, < ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: sltu a4, a0, a3 ; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma @@ -210,9 +210,9 @@ define @vfsqrt_vv_nxv32bf16_unmasked( @vfsqrt_vv_nxv32f16( %va, @vfsqrt_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: sltu a4, a0, a3 ; ZVFHMIN-NEXT: addi a4, a4, -1 -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 10a92f0188a93..36cb90e7cc2c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3109,9 +3109,9 @@ define @vmand_mm( %a, %b, ; NOVLOPT-NEXT: vmand.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmand_mm: @@ -3119,9 +3119,9 @@ define @vmand_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmand.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmand.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3136,9 +3136,9 @@ define @vmnand_mm( %a, %b, ; NOVLOPT-NEXT: vmnand.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmnand_mm: @@ -3146,9 +3146,9 @@ define @vmnand_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmnand.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmnand.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3163,9 +3163,9 @@ define @vmandn_mm( %a, %b, ; NOVLOPT-NEXT: vmandn.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmandn_mm: @@ -3173,9 +3173,9 @@ define @vmandn_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmandn.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmandn.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3190,9 +3190,9 @@ define @vmxor_mm( %a, %b, ; NOVLOPT-NEXT: vmxor.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmxor_mm: @@ -3200,9 +3200,9 @@ define @vmxor_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmxor.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmxor.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3217,9 +3217,9 @@ define @vmor_mm( %a, %b, < ; NOVLOPT-NEXT: vmor.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmor_mm: @@ -3227,9 +3227,9 @@ define @vmor_mm( %a, %b, < ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmor.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmor.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3245,9 +3245,9 @@ define @vmnor_mm( %a, %b, ; NOVLOPT-NEXT: vmnor.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmnor_mm: @@ -3255,9 +3255,9 @@ define @vmnor_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmnor.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmnor.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3272,9 +3272,9 @@ define @vmorn_mm( %a, %b, ; NOVLOPT-NEXT: vmorn.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmorn_mm: @@ -3282,9 +3282,9 @@ define @vmorn_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmorn.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmorn.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3299,9 +3299,9 @@ define @vmxnor_mm( %a, %b, ; NOVLOPT-NEXT: vmxnor.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmxnor_mm: @@ -3309,9 +3309,9 @@ define @vmxnor_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmxnor.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmxnor.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv0-elimination.mir b/llvm/test/CodeGen/RISCV/rvv/vmv0-elimination.mir deleted file mode 100644 index 433d571080e37..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmv0-elimination.mir +++ /dev/null @@ -1,22 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-vmv0-elimination -verify-machineinstrs | FileCheck %s - -# If we peek through a COPY to vmv0, we may leave it dead. Make sure we delete -# it so the register allocator doesn't try to allocate it, which can lead to a -# regalloc error. -# https://github.com/llvm/llvm-project/pull/126850#issuecomment-2685166388 - ---- -name: dead_copy -body: | - bb.0: - liveins: $v8 - ; CHECK-LABEL: name: dead_copy - ; CHECK: liveins: $v8 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %x:vr = COPY $v8 - ; CHECK-NEXT: $v0 = COPY %x - ; CHECK-NEXT: PseudoVSE8_V_M1_MASK $noreg, $noreg, $v0, -1, 3 /* e8 */ - %x:vr = COPY $v8 - %y:vmv0 = COPY %x:vr - PseudoVSE8_V_M1_MASK $noreg, $noreg, %y:vmv0, -1, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll index 09d92c3c039f9..3e4638214a1d6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll @@ -7,11 +7,11 @@ define <2 x i1> @test_vp_reverse_v2i1_masked(<2 x i1> %src, <2 x i1> %mask, i32 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t @@ -46,11 +46,11 @@ define <4 x i1> @test_vp_reverse_v4i1_masked(<4 x i1> %src, <4 x i1> %mask, i32 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t @@ -85,11 +85,11 @@ define <8 x i1> @test_vp_reverse_v8i1_masked(<8 x i1> %src, <8 x i1> %mask, i32 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t @@ -124,11 +124,11 @@ define <16 x i1> @test_vp_reverse_v16i1_masked(<16 x i1> %src, <16 x i1> %mask, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll index 8e44d76e7010f..317ad7fc19dd7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll @@ -6,11 +6,11 @@ define @test_vp_reverse_nxv1i1_masked( %src, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t @@ -45,11 +45,11 @@ define @test_vp_reverse_nxv2i1_masked( %src, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t @@ -84,11 +84,11 @@ define @test_vp_reverse_nxv4i1_masked( %src, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t @@ -123,11 +123,11 @@ define @test_vp_reverse_nxv8i1_masked( %src, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t @@ -162,11 +162,11 @@ define @test_vp_reverse_nxv16i1_masked( %sr ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vid.v v12, v0.t +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v16, v10, v12, v0.t @@ -202,11 +202,11 @@ define @test_vp_reverse_nxv32i1_masked( %sr ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16, v0.t +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v12, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll index 745cec4e7c4f6..15e20d0e3d402 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll @@ -12,9 +12,9 @@ define <2 x i1> @test_vp_splice_v2i1(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %ev ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -36,9 +36,9 @@ define <2 x i1> @test_vp_splice_v2i1_negative_offset(<2 x i1> %va, <2 x i1> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -60,9 +60,9 @@ define <2 x i1> @test_vp_splice_v2i1_masked(<2 x i1> %va, <2 x i1> %vb, <2 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vmv1r.v v0, v10 @@ -85,9 +85,9 @@ define <4 x i1> @test_vp_splice_v4i1(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %ev ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -109,9 +109,9 @@ define <4 x i1> @test_vp_splice_v4i1_negative_offset(<4 x i1> %va, <4 x i1> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -133,9 +133,9 @@ define <4 x i1> @test_vp_splice_v4i1_masked(<4 x i1> %va, <4 x i1> %vb, <4 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vmv1r.v v0, v10 @@ -158,9 +158,9 @@ define <8 x i1> @test_vp_splice_v8i1(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %ev ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -182,9 +182,9 @@ define <8 x i1> @test_vp_splice_v8i1_negative_offset(<8 x i1> %va, <8 x i1> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -206,9 +206,9 @@ define <8 x i1> @test_vp_splice_v8i1_masked(<8 x i1> %va, <8 x i1> %vb, <8 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vmv1r.v v0, v10 @@ -231,9 +231,9 @@ define <16 x i1> @test_vp_splice_v16i1(<16 x i1> %va, <16 x i1> %vb, i32 zeroext ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -255,9 +255,9 @@ define <16 x i1> @test_vp_splice_v16i1_negative_offset(<16 x i1> %va, <16 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -279,9 +279,9 @@ define <16 x i1> @test_vp_splice_v16i1_masked(<16 x i1> %va, <16 x i1> %vb, <16 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vmv1r.v v0, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll index 3b0b183537468..b75891bf0d77b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll @@ -15,9 +15,9 @@ define @test_vp_splice_nxv1i1( %va, @test_vp_splice_nxv1i1_negative_offset( @test_vp_splice_nxv1i1_masked( %va, @test_vp_splice_nxv2i1( %va, @test_vp_splice_nxv2i1_negative_offset( @test_vp_splice_nxv2i1_masked( %va, @test_vp_splice_nxv4i1( %va, @test_vp_splice_nxv4i1_negative_offset( @test_vp_splice_nxv4i1_masked( %va, @test_vp_splice_nxv8i1( %va, @test_vp_splice_nxv8i1_negative_offset( @test_vp_splice_nxv8i1_masked( %va, @test_vp_splice_nxv16i1( %va, @test_vp_splice_nxv16i1_negative_offset( @test_vp_splice_nxv16i1_masked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v14, 0 @@ -381,8 +381,8 @@ define @test_vp_splice_nxv32i1( %va, @test_vp_splice_nxv32i1_negative_offset( @test_vp_splice_nxv32i1_masked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 @@ -455,8 +455,8 @@ define @test_vp_splice_nxv64i1( %va, @test_vp_splice_nxv64i1_negative_offset( @test_vp_splice_nxv64i1_masked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v24, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vaaddu.ll index 989fbb7fcea8b..165bf3db3413e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vaaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vaaddu.ll @@ -131,8 +131,8 @@ define @vaaddu_8( %x, %y, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8, v0.t -; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vzext.vf2 v8, v9, v0.t +; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vaaddu.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret %xz = call @llvm.vp.zext.nxv2i32.nxv2i8( %x, %m, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index d6e1af59e6341..db60973e3a08e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -342,13 +342,13 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v11, v11 ; RV32-NEXT: vwmaccu.vx v12, a1, v11 +; RV32-NEXT: add a1, a3, a3 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v11, v12, a3 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 -; RV32-NEXT: add a1, a3, a3 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vx v10, v9, a3 @@ -382,19 +382,19 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV64-NEXT: srli a3, a3, 2 ; RV64-NEXT: vwaddu.vv v12, v11, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 +; RV64-NEXT: add a1, a3, a3 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; RV64-NEXT: vslidedown.vx v11, v12, a3 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v11, 0 -; RV64-NEXT: add a1, a3, a3 ; RV64-NEXT: vmerge.vim v9, v9, 1, v0 ; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV64-NEXT: vslideup.vx v10, v9, a3 -; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: srli a1, a4, 32 +; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0), v0.t ; RV64-NEXT: li a1, 32 @@ -674,32 +674,32 @@ define {, } @not_same_mask( ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 -; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: li a2, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vmerge.vim v11, v8, 1, v0 +; RV32-NEXT: vmerge.vim v8, v10, 1, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmerge.vim v9, v8, 1, v0 +; RV32-NEXT: vmerge.vim v9, v10, 1, v0 ; RV32-NEXT: srli a3, a3, 2 -; RV32-NEXT: vwaddu.vv v12, v9, v11 -; RV32-NEXT: vwmaccu.vx v12, a2, v11 -; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v9, v12, a3 -; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; RV32-NEXT: vmsne.vi v0, v9, 0 +; RV32-NEXT: vwaddu.vv v12, v9, v8 +; RV32-NEXT: vwmaccu.vx v12, a2, v8 ; RV32-NEXT: add a2, a3, a3 -; RV32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV32-NEXT: vmsne.vi v0, v12, 0 +; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v8, v12, a3 +; RV32-NEXT: vmerge.vim v9, v11, 1, v0 +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vmsne.vi v0, v8, 0 +; RV32-NEXT: vmerge.vim v8, v10, 1, v0 ; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vx v10, v8, a3 +; RV32-NEXT: vslideup.vx v9, v8, a3 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vmsne.vi v0, v10, 0 +; RV32-NEXT: vmsne.vi v0, v9, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma @@ -711,33 +711,33 @@ define {, } @not_same_mask( ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vmv1r.v v9, v0 -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: vmv.v.i v11, 0 ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: slli a1, a1, 33 +; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: vmerge.vim v11, v8, 1, v0 +; RV64-NEXT: vmerge.vim v8, v10, 1, v0 ; RV64-NEXT: vmv1r.v v0, v9 -; RV64-NEXT: vmerge.vim v9, v8, 1, v0 +; RV64-NEXT: vmerge.vim v9, v10, 1, v0 ; RV64-NEXT: srli a3, a3, 2 -; RV64-NEXT: vwaddu.vv v12, v9, v11 -; RV64-NEXT: vwmaccu.vx v12, a2, v11 -; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v9, v12, a3 -; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; RV64-NEXT: vmsne.vi v0, v9, 0 +; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: vwaddu.vv v12, v9, v8 +; RV64-NEXT: vwmaccu.vx v12, a2, v8 ; RV64-NEXT: add a2, a3, a3 -; RV64-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64-NEXT: vmsne.vi v0, v12, 0 +; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v8, v12, a3 +; RV64-NEXT: vmerge.vim v9, v11, 1, v0 +; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV64-NEXT: vmsne.vi v0, v8, 0 +; RV64-NEXT: vmerge.vim v8, v10, 1, v0 ; RV64-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vx v10, v8, a3 +; RV64-NEXT: vslideup.vx v9, v8, a3 ; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: vmsne.vi v0, v9, 0 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0), v0.t ; RV64-NEXT: li a0, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index b6ec7906885ff..e5b34775d838c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -2435,11 +2435,11 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: srli a2, a2, 3 ; RV32-NEXT: sltu a1, a1, a3 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2567,12 +2567,12 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: sub a3, a1, a2 -; RV64-NEXT: srli a4, a2, 3 -; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: sltu a4, a1, a3 ; RV64-NEXT: addi a4, a4, -1 ; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: srli a4, a2, 3 +; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma @@ -2610,9 +2610,9 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: srli a2, a2, 3 ; RV32-NEXT: sltu a1, a1, a3 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2634,9 +2634,9 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: srli a2, a2, 3 ; RV64-NEXT: sltu a1, a1, a3 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 ; RV64-NEXT: and a1, a1, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index edfa4a7560949..25e1c61bd2f0d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -527,12 +527,12 @@ define @vpload_nxv16f64(ptr %ptr, %m, ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: slli a4, a2, 3 -; CHECK-NEXT: srli a5, a2, 3 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 ; CHECK-NEXT: sltu a5, a1, a3 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a3, a5, a3 +; CHECK-NEXT: srli a5, a2, 3 ; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: bltu a1, a2, .LBB44_2 @@ -591,9 +591,9 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, %val, ; RV32-NEXT: srli a0, a0, 3 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a0 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t ; RV32-NEXT: ret @@ -2301,9 +2301,9 @@ define void @vpscatter_nxv16f64( %val, ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: sltu a2, a2, a0 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2338,9 +2338,9 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV32-NEXT: srli a1, a1, 3 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2373,9 +2373,9 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma @@ -2411,9 +2411,9 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV32-NEXT: srli a1, a1, 3 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2446,9 +2446,9 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma @@ -2485,9 +2485,9 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV32-NEXT: srli a1, a1, 3 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2510,9 +2510,9 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 3b406656a4dd6..5171194c3f8b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -439,15 +439,15 @@ define void @vpstore_nxv16f64( %val, ptr %ptr, %val, ptr %ptr, %val, ptr %ptr, %a, %ma, %mb diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index c94516d187409..8aec47020b0a3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -364,15 +364,15 @@ define @select_nxv32i32( %a, @select_evl_nxv32i32( %a, @select_nxv16f64( %a, @vwadd_wv_mask_v8i32( %x, %y) { ; CHECK-LABEL: vwadd_wv_mask_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 ; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 -; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu -; CHECK-NEXT: vwadd.wv v8, v8, v24, v0.t +; CHECK-NEXT: vwadd.wv v16, v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %mask = icmp slt %x, splat (i32 42) %a = select %mask, %x, zeroinitializer @@ -23,13 +22,12 @@ define @vwadd_wv_mask_v8i32( %x, @vwaddu_wv_mask_v8i32( %x, %y) { ; CHECK-LABEL: vwaddu_wv_mask_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 ; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 -; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu -; CHECK-NEXT: vwaddu.wv v8, v8, v24, v0.t +; CHECK-NEXT: vwaddu.wv v16, v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %mask = icmp slt %x, splat (i32 42) %a = select %mask, %x, zeroinitializer @@ -60,13 +58,12 @@ define @vwaddu_vv_mask_v8i32( %x, @vwadd_wv_mask_v8i32_commutative( %x, %y) { ; CHECK-LABEL: vwadd_wv_mask_v8i32_commutative: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 ; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 -; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu -; CHECK-NEXT: vwadd.wv v8, v8, v24, v0.t +; CHECK-NEXT: vwadd.wv v16, v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %mask = icmp slt %x, splat (i32 42) %a = select %mask, %x, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll index 22f9f97373415..2d3a3e16bd9e4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll @@ -5,13 +5,12 @@ define @vwsub_wv_mask_v8i32( %x, %y) { ; CHECK-LABEL: vwsub_wv_mask_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 ; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 -; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu -; CHECK-NEXT: vwsub.wv v8, v8, v24, v0.t +; CHECK-NEXT: vwsub.wv v16, v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %mask = icmp slt %x, splat (i32 42) %a = select %mask, %x, zeroinitializer @@ -23,13 +22,12 @@ define @vwsub_wv_mask_v8i32( %x, @vwsubu_wv_mask_v8i32( %x, %y) { ; CHECK-LABEL: vwsubu_wv_mask_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv4r.v v24, v8 ; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 -; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu -; CHECK-NEXT: vwsubu.wv v8, v8, v24, v0.t +; CHECK-NEXT: vwsubu.wv v16, v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %mask = icmp slt %x, splat (i32 42) %a = select %mask, %x, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index c73a18c8869d5..08e0fafce254d 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -538,15 +538,9 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-NEXT: srli a1, a1, 21 ; RV32MV-NEXT: vslide1down.vx v10, v10, a1 ; RV32MV-NEXT: li a1, 2047 -; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV32MV-NEXT: vmv.v.i v11, 1 +; RV32MV-NEXT: addi a3, a3, -1527 ; RV32MV-NEXT: andi a2, a2, 2047 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV32MV-NEXT: vslide1down.vx v10, v10, a2 -; RV32MV-NEXT: lui a2, %hi(.LCPI4_1) -; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_1) -; RV32MV-NEXT: addi a3, a3, -1527 -; RV32MV-NEXT: vsext.vf2 v12, v11 ; RV32MV-NEXT: vslidedown.vi v10, v10, 1 ; RV32MV-NEXT: vsub.vv v8, v10, v8 ; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -556,14 +550,20 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vmul.vv v8, v8, v9 ; RV32MV-NEXT: vadd.vv v9, v8, v8 ; RV32MV-NEXT: vsll.vv v9, v9, v11 -; RV32MV-NEXT: vle16.v v10, (a2) +; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32MV-NEXT: vmv.v.i v10, 1 +; RV32MV-NEXT: lui a2, %hi(.LCPI4_1) +; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_1) +; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32MV-NEXT: vsext.vf2 v11, v10 ; RV32MV-NEXT: vand.vx v8, v8, a1 -; RV32MV-NEXT: vsrl.vv v8, v8, v12 +; RV32MV-NEXT: vsrl.vv v8, v8, v11 +; RV32MV-NEXT: vle16.v v10, (a2) ; RV32MV-NEXT: vor.vv v8, v8, v9 +; RV32MV-NEXT: vmv.v.i v9, 0 ; RV32MV-NEXT: vand.vx v8, v8, a1 ; RV32MV-NEXT: vmsltu.vv v0, v10, v8 -; RV32MV-NEXT: vmv.v.i v8, 0 -; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 +; RV32MV-NEXT: vmerge.vim v8, v9, -1, v0 ; RV32MV-NEXT: vslidedown.vi v9, v8, 2 ; RV32MV-NEXT: vmv.x.s a1, v8 ; RV32MV-NEXT: vslidedown.vi v8, v8, 1 @@ -599,15 +599,9 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: srli a2, a2, 53 ; RV64MV-NEXT: vslide1down.vx v10, v10, a2 ; RV64MV-NEXT: li a2, 2047 -; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64MV-NEXT: vmv.v.i v11, 1 +; RV64MV-NEXT: addi a3, a3, -1527 ; RV64MV-NEXT: srli a1, a1, 22 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64MV-NEXT: vslide1down.vx v10, v10, a1 -; RV64MV-NEXT: lui a1, %hi(.LCPI4_1) -; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_1) -; RV64MV-NEXT: addi a3, a3, -1527 -; RV64MV-NEXT: vsext.vf2 v12, v11 ; RV64MV-NEXT: vslidedown.vi v10, v10, 1 ; RV64MV-NEXT: vsub.vv v8, v10, v8 ; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -617,14 +611,20 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: vmul.vv v8, v8, v9 ; RV64MV-NEXT: vadd.vv v9, v8, v8 ; RV64MV-NEXT: vsll.vv v9, v9, v11 -; RV64MV-NEXT: vle16.v v10, (a1) +; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64MV-NEXT: vmv.v.i v10, 1 +; RV64MV-NEXT: lui a1, %hi(.LCPI4_1) +; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_1) +; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64MV-NEXT: vsext.vf2 v11, v10 ; RV64MV-NEXT: vand.vx v8, v8, a2 -; RV64MV-NEXT: vsrl.vv v8, v8, v12 +; RV64MV-NEXT: vsrl.vv v8, v8, v11 +; RV64MV-NEXT: vle16.v v10, (a1) ; RV64MV-NEXT: vor.vv v8, v8, v9 +; RV64MV-NEXT: vmv.v.i v9, 0 ; RV64MV-NEXT: vand.vx v8, v8, a2 ; RV64MV-NEXT: vmsltu.vv v0, v10, v8 -; RV64MV-NEXT: vmv.v.i v8, 0 -; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0 +; RV64MV-NEXT: vmerge.vim v8, v9, -1, v0 ; RV64MV-NEXT: vmv.x.s a1, v8 ; RV64MV-NEXT: vslidedown.vi v9, v8, 1 ; RV64MV-NEXT: vslidedown.vi v8, v8, 2