diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 2a13c4820f389..52b5919b7e401 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -149,6 +149,14 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { MadeChange |= doPeepholeMergeVVMFold(); + // After we're done with everything else, convert IMPLICIT_DEF + // passthru operands to NoRegister. This is required to workaround + // an optimization deficiency in MachineCSE. This really should + // be merged back into each of the patterns (i.e. there's no good + // reason not to go directly to NoReg), but is being done this way + // to allow easy backporting. + MadeChange |= doPeepholeNoRegPassThru(); + if (MadeChange) CurDAG->RemoveDeadNodes(); } @@ -3593,6 +3601,44 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { return MadeChange; } +/// If our passthru is an implicit_def, use noreg instead. This side +/// steps issues with MachineCSE not being able to CSE expressions with +/// IMPLICIT_DEF operands while preserving the semantic intent. See +/// pr64282 for context. Note that this transform is the last one +/// performed at ISEL DAG to DAG. +bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() { + bool MadeChange = false; + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + const unsigned Opc = N->getMachineOpcode(); + if (!RISCVVPseudosTable::getPseudoInfo(Opc) || + !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) || + !isImplicitDef(N->getOperand(0))) + continue; + + SmallVector Ops; + Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0))); + for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { + SDValue Op = N->getOperand(I); + Ops.push_back(Op); + } + + MachineSDNode *Result = + CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + Result->setFlags(N->getFlags()); + CurDAG->setNodeMemRefs(Result, cast(N)->memoperands()); + ReplaceUses(N, Result); + MadeChange = true; + } + return MadeChange; +} + + // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index a2e5c50c370c2..fbc1520a54ba0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -187,6 +187,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { bool doPeepholeSExtW(SDNode *Node); bool doPeepholeMaskedRVV(MachineSDNode *Node); bool doPeepholeMergeVVMFold(); + bool doPeepholeNoRegPassThru(); bool performVMergeToVMv(SDNode *N); bool performCombineVMergeAndVOps(SDNode *N); }; diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index f1ebe63cfa145..41fd1e1b57e3c 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -160,9 +160,13 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI, // lanes are undefined. return true; - // If the tied operand is an IMPLICIT_DEF (or a REG_SEQUENCE whose operands - // are solely IMPLICIT_DEFS), the pass through lanes are undefined. + // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose + // operands are solely IMPLICIT_DEFS, then the pass through lanes are + // undefined. const MachineOperand &UseMO = MI.getOperand(UseOpIdx); + if (UseMO.getReg() == RISCV::NoRegister) + return true; + if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) { if (UseMI->isImplicitDef()) return true; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 30a8b43880337..483da01276de3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -52,16 +52,16 @@ /// /// Currently, the policy is represented via the following instrinsic families: /// * _MASK - Can represent all three policy states for both tail and mask. If -/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise, -/// policy operand and tablegen flags drive the interpretation. (If policy -/// operand is not present - there are a couple, thought we're rapidly -/// removing them - a non-undefined policy defaults to "tail agnostic", and -/// "mask undisturbed". Since this is the only variant with a mask, all -/// other variants are "mask undefined". +/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined". +/// Otherwise, policy operand and tablegen flags drive the interpretation. +/// (If policy operand is not present - there are a couple, though we're +/// rapidly removing them - a non-undefined policy defaults to "tail +/// agnostic", and "mask undisturbed". Since this is the only variant with +/// a mask, all other variants are "mask undefined". /// * Unsuffixed w/ both passthrough and policy operand. Can represent all -/// three policy states. If passthrough is IMPLICIT_DEF, then represents -/// "undefined". Otherwise, policy operand and tablegen flags drive the -/// interpretation. +/// three policy states. If passthrough is IMPLICIT_DEF (or NoReg), then +/// represents "undefined". Otherwise, policy operand and tablegen flags +/// drive the interpretation. /// * Unsuffixed w/o passthrough or policy operand -- Does not have a /// passthrough operand, and thus represents the "undefined" state. Note /// that terminology in code frequently refers to these as "TA" which is @@ -70,8 +70,8 @@ /// * _TU w/o policy operand -- Has a passthrough operand, and always /// represents the tail undisturbed state. /// * _TU w/policy operand - Can represent all three policy states. If -/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise, -/// policy operand and tablegen flags drive the interpretation. +/// passthrough is IMPLICIT_DEF (or NoReg), then represents "undefined". +/// Otherwise, policy operand and tablegen flags drive the interpretation. /// //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp index 2fd5ed0d018d3..7c6a89b6036fa 100644 --- a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp +++ b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp @@ -9,7 +9,8 @@ // This file implements a function pass that initializes undef vector value to // temporary pseudo instruction and remove it in expandpseudo pass to prevent // register allocation resulting in a constraint violated result for vector -// instruction. +// instruction. It also rewrites the NoReg tied operand back to an +// IMPLICIT_DEF. // // RISC-V vector instruction has register overlapping constraint for certain // instructions, and will cause illegal instruction trap if violated, we use @@ -30,6 +31,12 @@ // // See also: https://github.com/llvm/llvm-project/issues/50157 // +// Additionally, this pass rewrites tied operands of vector instructions +// from NoReg to IMPLICIT_DEF. (Not that this is a non-overlapping set of +// operands to the above.) We use NoReg to side step a MachineCSE +// optimization quality problem but need to convert back before +// TwoAddressInstruction. See pr64282 for context. +// //===----------------------------------------------------------------------===// #include "RISCV.h" @@ -244,6 +251,26 @@ bool RISCVInitUndef::processBasicBlock(MachineFunction &MF, bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { MachineInstr &MI = *I; + + // If we used NoReg to represent the passthru, switch this back to being + // an IMPLICIT_DEF before TwoAddressInstructions. + unsigned UseOpIdx; + if (MI.getNumDefs() != 0 && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { + MachineOperand &UseMO = MI.getOperand(UseOpIdx); + if (UseMO.getReg() == RISCV::NoRegister) { + const TargetRegisterClass *RC = + TII->getRegClass(MI.getDesc(), UseOpIdx, TRI, MF); + Register NewDest = MRI->createVirtualRegister(RC); + // We don't have a way to update dead lanes, so keep track of the + // new register so that we avoid querying it later. + NewRegs.insert(NewDest); + BuildMI(MBB, I, I->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), NewDest); + UseMO.setReg(NewDest); + Changed = true; + } + } + if (ST->enableSubRegLiveness() && isEarlyClobberMI(MI)) Changed |= handleSubReg(MF, MI, DLD); if (MI.isImplicitDef()) { diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 59dac5c7b57d8..d4fd66c9b360c 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -273,6 +273,7 @@ class RISCVPassConfig : public TargetPassConfig { void addPreRegAlloc() override; void addPostRegAlloc() override; void addOptimizedRegAlloc() override; + void addFastRegAlloc() override; }; } // namespace @@ -392,12 +393,17 @@ void RISCVPassConfig::addPreRegAlloc() { } void RISCVPassConfig::addOptimizedRegAlloc() { - if (getOptimizeRegAlloc()) - insertPass(&DetectDeadLanesID, &RISCVInitUndefID); + insertPass(&DetectDeadLanesID, &RISCVInitUndefID); TargetPassConfig::addOptimizedRegAlloc(); } +void RISCVPassConfig::addFastRegAlloc() { + addPass(createRISCVInitUndefPass()); + TargetPassConfig::addFastRegAlloc(); +} + + void RISCVPassConfig::addPostRegAlloc() { if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination) addPass(createRISCVRedundantCopyEliminationPass()); diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index 7a6e027057a97..01c7613201854 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -42,6 +42,7 @@ ; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V Insert VSETVLI pass ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass +; CHECK-NEXT: RISC-V init undef pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass ; CHECK-NEXT: Fast Register Allocator diff --git a/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll b/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll index 776ad183f3d90..3e2af11365297 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-vector-on-stack.ll @@ -17,11 +17,11 @@ define void @bar() nounwind { ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: mv s1, sp ; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: addi a0, s1, 64 -; CHECK-NEXT: sd a0, 0(sp) -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: addi a0, s1, 64 ; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: sd a0, 0(sp) ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: li a2, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index ddcc787468675..fe45772fab4f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -103,15 +103,15 @@ define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) { define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: ret @@ -122,15 +122,15 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) @@ -157,15 +157,15 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv128: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_1) diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll index 5598935bb5110..b9b691662b0e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -1094,16 +1094,16 @@ define @bitreverse_nxv1i64( %va) { ; RV32-NEXT: vsrl.vx v10, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: mv a3, sp -; RV32-NEXT: vlse64.v v11, (a3), zero ; RV32-NEXT: vand.vx v10, v10, a2 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vand.vv v10, v10, v11 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: mv a3, sp +; RV32-NEXT: vlse64.v v11, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vand.vv v12, v12, v11 +; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsll.vx v10, v8, a0 ; RV32-NEXT: vand.vx v12, v8, a2 @@ -1142,35 +1142,35 @@ define @bitreverse_nxv1i64( %va) { ; ; RV64-LABEL: bitreverse_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsll.vi v9, v9, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v10, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v11, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v11, v11, a4 -; RV64-NEXT: vor.vv v10, v10, v11 +; RV64-NEXT: vsrl.vx v9, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v10, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v10, v10, a2 ; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vsrl.vx v10, v8, a2 -; RV64-NEXT: vsrl.vx v11, v8, a4 -; RV64-NEXT: vand.vx v11, v11, a3 +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsrl.vi v11, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v11, v11, a4 ; RV64-NEXT: vor.vv v10, v11, v10 -; RV64-NEXT: vsrl.vi v11, v8, 24 -; RV64-NEXT: vand.vx v11, v11, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v11 +; RV64-NEXT: vor.vv v9, v10, v9 +; RV64-NEXT: vand.vx v10, v8, a3 +; RV64-NEXT: vsll.vi v10, v10, 24 +; RV64-NEXT: vand.vx v11, v8, a4 +; RV64-NEXT: vsll.vi v11, v11, 8 +; RV64-NEXT: vor.vv v10, v10, v11 +; RV64-NEXT: vsll.vx v11, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v11, v8 ; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 @@ -1237,16 +1237,16 @@ define @bitreverse_nxv2i64( %va) { ; RV32-NEXT: vsrl.vx v12, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: mv a3, sp -; RV32-NEXT: vlse64.v v14, (a3), zero ; RV32-NEXT: vand.vx v12, v12, a2 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v12, v12, v14 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: mv a3, sp +; RV32-NEXT: vlse64.v v14, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v14 +; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsll.vx v12, v8, a0 ; RV32-NEXT: vand.vx v16, v8, a2 @@ -1285,35 +1285,35 @@ define @bitreverse_nxv2i64( %va) { ; ; RV64-LABEL: bitreverse_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v12, v8, a1 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v12, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v14, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v14, v14, a4 -; RV64-NEXT: vor.vv v12, v12, v14 +; RV64-NEXT: vsrl.vx v10, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v12, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v12, v12, a2 ; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vsrl.vx v12, v8, a2 -; RV64-NEXT: vsrl.vx v14, v8, a4 -; RV64-NEXT: vand.vx v14, v14, a3 +; RV64-NEXT: vsrl.vi v12, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vsrl.vi v14, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v14, v14, a4 ; RV64-NEXT: vor.vv v12, v14, v12 -; RV64-NEXT: vsrl.vi v14, v8, 24 -; RV64-NEXT: vand.vx v14, v14, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v14 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vand.vx v12, v8, a3 +; RV64-NEXT: vsll.vi v12, v12, 24 +; RV64-NEXT: vand.vx v14, v8, a4 +; RV64-NEXT: vsll.vi v14, v14, 8 +; RV64-NEXT: vor.vv v12, v12, v14 +; RV64-NEXT: vsll.vx v14, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v14, v8 ; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 @@ -1380,16 +1380,16 @@ define @bitreverse_nxv4i64( %va) { ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: mv a3, sp -; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: vand.vx v16, v16, a2 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v20 -; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: mv a3, sp +; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vand.vv v24, v24, v20 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vsll.vx v16, v8, a0 ; RV32-NEXT: vand.vx v24, v8, a2 @@ -1428,35 +1428,35 @@ define @bitreverse_nxv4i64( %va) { ; ; RV64-LABEL: bitreverse_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v16, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v20, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v20, v20, a4 -; RV64-NEXT: vor.vv v16, v16, v20 +; RV64-NEXT: vsrl.vx v12, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v16, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v16, v16, a2 ; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vsrl.vx v16, v8, a2 -; RV64-NEXT: vsrl.vx v20, v8, a4 -; RV64-NEXT: vand.vx v20, v20, a3 +; RV64-NEXT: vsrl.vi v16, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: vsrl.vi v20, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v20, v20, a4 ; RV64-NEXT: vor.vv v16, v20, v16 -; RV64-NEXT: vsrl.vi v20, v8, 24 -; RV64-NEXT: vand.vx v20, v20, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v20 +; RV64-NEXT: vor.vv v12, v16, v12 +; RV64-NEXT: vand.vx v16, v8, a3 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: vand.vx v20, v8, a4 +; RV64-NEXT: vsll.vi v20, v20, 8 +; RV64-NEXT: vor.vv v16, v16, v20 +; RV64-NEXT: vsll.vx v20, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v20, v8 ; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 @@ -1524,37 +1524,37 @@ define @bitreverse_nxv8i64( %va) { ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a0 ; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v0, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vlse64.v v24, (a3), zero -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v0, v0, v24 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vlse64.v v0, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v0 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a2 -; RV32-NEXT: vsll.vx v0, v0, a1 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: vsll.vx v24, v24, a1 ; RV32-NEXT: vsll.vx v16, v8, a0 -; RV32-NEXT: vor.vv v0, v16, v0 -; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: vor.vv v24, v16, v24 +; RV32-NEXT: vand.vv v16, v8, v0 ; RV32-NEXT: vand.vx v8, v8, a3 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vsll.vi v16, v16, 8 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: addi a0, sp, 40 ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vor.vv v8, v0, v8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v24 @@ -1585,35 +1585,35 @@ define @bitreverse_nxv8i64( %va) { ; ; RV64-LABEL: bitreverse_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v24, v8, a1 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v24, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v0, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v0, v0, a4 -; RV64-NEXT: vor.vv v24, v24, v0 +; RV64-NEXT: vsrl.vx v16, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v24, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsrl.vx v24, v8, a2 -; RV64-NEXT: vsrl.vx v0, v8, a4 -; RV64-NEXT: vand.vx v0, v0, a3 +; RV64-NEXT: vsrl.vi v24, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vsrl.vi v0, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v0, v0, a4 ; RV64-NEXT: vor.vv v24, v0, v24 -; RV64-NEXT: vsrl.vi v0, v8, 24 -; RV64-NEXT: vand.vx v0, v0, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v0 +; RV64-NEXT: vor.vv v16, v24, v16 +; RV64-NEXT: vand.vx v24, v8, a3 +; RV64-NEXT: vsll.vi v24, v24, 24 +; RV64-NEXT: vand.vx v0, v8, a4 +; RV64-NEXT: vsll.vi v0, v0, 8 +; RV64-NEXT: vor.vv v24, v24, v0 +; RV64-NEXT: vsll.vx v0, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v0, v8 ; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: lui a0, 61681 ; RV64-NEXT: addiw a0, a0, -241 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index 05361f48cd511..15e8566ff8ba5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -2208,35 +2208,35 @@ define @vp_bitreverse_nxv1i64( %va, @vp_bitreverse_nxv1i64_unmasked( %va ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: vsll.vx v9, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v9, v10 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v10, a4 -; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: vand.vx v10, v8, a4 +; RV32-NEXT: vsll.vi v10, v10, 24 ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v11, v11, v12 -; RV32-NEXT: vor.vv v10, v11, v10 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a1 -; RV32-NEXT: vand.vx v11, v8, a3 -; RV32-NEXT: vsll.vx v11, v11, a2 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vand.vx v11, v8, a4 -; RV32-NEXT: vsll.vi v11, v11, 24 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v11, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vsrl.vx v12, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma @@ -2512,35 +2512,35 @@ define @vp_bitreverse_nxv2i64( %va, @vp_bitreverse_nxv2i64_unmasked( %va ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v10, v12 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v14, v14, v16 -; RV32-NEXT: vor.vv v12, v14, v12 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v14, v8, a3 -; RV32-NEXT: vsll.vx v14, v14, a2 -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vand.vx v14, v8, a4 -; RV32-NEXT: vsll.vi v14, v14, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v14, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma @@ -2816,35 +2816,35 @@ define @vp_bitreverse_nxv4i64( %va, @vp_bitreverse_nxv4i64_unmasked( %va ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v12, v16 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vsrl.vi v20, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v20, v20, v24 -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v20, v8, a3 -; RV32-NEXT: vsll.vx v20, v20, a2 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vand.vx v20, v8, a4 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a3 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vand.vx v24, v24, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma @@ -3125,73 +3125,73 @@ define @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64_unmasked( %va ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v24, a4 -; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v0, v16, 24 ; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma @@ -3507,73 +3507,73 @@ define @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64_unmasked( %va ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v24, a4 -; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v0, v16, 24 ; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll index 83c740d6cda4d..25bee211fb2b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll @@ -348,16 +348,16 @@ define @bswap_nxv1i64( %va) { ; RV32-NEXT: vsrl.vx v10, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v11, (a3), zero ; RV32-NEXT: vand.vx v10, v10, a2 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vand.vv v10, v10, v11 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v11, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vand.vv v12, v12, v11 +; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsll.vx v10, v8, a0 ; RV32-NEXT: vand.vx v12, v8, a2 @@ -375,35 +375,35 @@ define @bswap_nxv1i64( %va) { ; ; RV64-LABEL: bswap_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV64-NEXT: vand.vx v9, v8, a0 -; RV64-NEXT: vsll.vi v9, v9, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v10, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v11, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v11, v11, a4 -; RV64-NEXT: vor.vv v10, v10, v11 +; RV64-NEXT: vsrl.vx v9, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v10, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v10, v10, a2 ; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vsrl.vx v10, v8, a2 -; RV64-NEXT: vsrl.vx v11, v8, a4 -; RV64-NEXT: vand.vx v11, v11, a3 +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsrl.vi v11, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v11, v11, a4 ; RV64-NEXT: vor.vv v10, v11, v10 -; RV64-NEXT: vsrl.vi v11, v8, 24 -; RV64-NEXT: vand.vx v11, v11, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v11 +; RV64-NEXT: vor.vv v9, v10, v9 +; RV64-NEXT: vand.vx v10, v8, a3 +; RV64-NEXT: vsll.vi v10, v10, 24 +; RV64-NEXT: vand.vx v11, v8, a4 +; RV64-NEXT: vsll.vi v11, v11, 8 +; RV64-NEXT: vor.vv v10, v10, v11 +; RV64-NEXT: vsll.vx v11, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v11, v8 ; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bswap_nxv1i64: @@ -431,16 +431,16 @@ define @bswap_nxv2i64( %va) { ; RV32-NEXT: vsrl.vx v12, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v14, (a3), zero ; RV32-NEXT: vand.vx v12, v12, a2 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v12, v12, v14 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v14, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v14 +; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsll.vx v12, v8, a0 ; RV32-NEXT: vand.vx v16, v8, a2 @@ -458,35 +458,35 @@ define @bswap_nxv2i64( %va) { ; ; RV64-LABEL: bswap_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vsll.vi v10, v10, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v12, v8, a1 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v12, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v14, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v14, v14, a4 -; RV64-NEXT: vor.vv v12, v12, v14 +; RV64-NEXT: vsrl.vx v10, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v12, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v12, v12, a2 ; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vsrl.vx v12, v8, a2 -; RV64-NEXT: vsrl.vx v14, v8, a4 -; RV64-NEXT: vand.vx v14, v14, a3 +; RV64-NEXT: vsrl.vi v12, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vsrl.vi v14, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v14, v14, a4 ; RV64-NEXT: vor.vv v12, v14, v12 -; RV64-NEXT: vsrl.vi v14, v8, 24 -; RV64-NEXT: vand.vx v14, v14, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v14 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vand.vx v12, v8, a3 +; RV64-NEXT: vsll.vi v12, v12, 24 +; RV64-NEXT: vand.vx v14, v8, a4 +; RV64-NEXT: vsll.vi v14, v14, 8 +; RV64-NEXT: vor.vv v12, v12, v14 +; RV64-NEXT: vsll.vx v14, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v14, v8 ; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bswap_nxv2i64: @@ -514,16 +514,16 @@ define @bswap_nxv4i64( %va) { ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: vand.vx v16, v16, a2 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v20 -; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vand.vv v24, v24, v20 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vsll.vx v16, v8, a0 ; RV32-NEXT: vand.vx v24, v8, a2 @@ -541,35 +541,35 @@ define @bswap_nxv4i64( %va) { ; ; RV64-LABEL: bswap_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vsll.vi v12, v12, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v16, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v20, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v20, v20, a4 -; RV64-NEXT: vor.vv v16, v16, v20 +; RV64-NEXT: vsrl.vx v12, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v16, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v16, v16, a2 ; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vsrl.vx v16, v8, a2 -; RV64-NEXT: vsrl.vx v20, v8, a4 -; RV64-NEXT: vand.vx v20, v20, a3 +; RV64-NEXT: vsrl.vi v16, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: vsrl.vi v20, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v20, v20, a4 ; RV64-NEXT: vor.vv v16, v20, v16 -; RV64-NEXT: vsrl.vi v20, v8, 24 -; RV64-NEXT: vand.vx v20, v20, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v20 +; RV64-NEXT: vor.vv v12, v16, v12 +; RV64-NEXT: vand.vx v16, v8, a3 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: vand.vx v20, v8, a4 +; RV64-NEXT: vsll.vi v20, v20, 8 +; RV64-NEXT: vor.vv v16, v16, v20 +; RV64-NEXT: vsll.vx v20, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v20, v8 ; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bswap_nxv4i64: @@ -596,37 +596,37 @@ define @bswap_nxv8i64( %va) { ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v24, v8, a0 +; RV32-NEXT: vsrl.vx v16, v8, a0 ; RV32-NEXT: li a1, 40 -; RV32-NEXT: vsrl.vx v0, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vand.vx v0, v0, a3 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a2 ; RV32-NEXT: vsll.vx v0, v0, a1 -; RV32-NEXT: vsll.vx v24, v8, a0 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a0 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 ; RV32-NEXT: vand.vx v8, v8, a3 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 @@ -638,35 +638,35 @@ define @bswap_nxv8i64( %va) { ; ; RV64-LABEL: bswap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: li a1, 255 -; RV64-NEXT: slli a1, a1, 24 -; RV64-NEXT: vand.vx v24, v8, a1 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v24, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v0, v8, a3 -; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v0, v0, a4 -; RV64-NEXT: vor.vv v24, v24, v0 +; RV64-NEXT: vsrl.vx v16, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v24, v8, a1 +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -256 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsrl.vx v24, v8, a2 -; RV64-NEXT: vsrl.vx v0, v8, a4 -; RV64-NEXT: vand.vx v0, v0, a3 +; RV64-NEXT: vsrl.vi v24, v8, 24 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vsrl.vi v0, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v0, v0, a4 ; RV64-NEXT: vor.vv v24, v0, v24 -; RV64-NEXT: vsrl.vi v0, v8, 24 -; RV64-NEXT: vand.vx v0, v0, a0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v8, v0 +; RV64-NEXT: vor.vv v16, v24, v16 +; RV64-NEXT: vand.vx v24, v8, a3 +; RV64-NEXT: vsll.vi v24, v24, 24 +; RV64-NEXT: vand.vx v0, v8, a4 +; RV64-NEXT: vsll.vi v0, v0, 8 +; RV64-NEXT: vor.vv v24, v24, v0 +; RV64-NEXT: vsll.vx v0, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v0, v8 ; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bswap_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index 244236fe77749..be79222b5c5a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -700,35 +700,35 @@ define @vp_bswap_nxv1i64( %va, @vp_bswap_nxv1i64_unmasked( %va, i32 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: vsll.vx v9, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v9, v10 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v10, a4 -; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: vand.vx v10, v8, a4 +; RV32-NEXT: vsll.vi v10, v10, 24 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v11, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v11, v11, v12 -; RV32-NEXT: vor.vv v10, v11, v10 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a1 -; RV32-NEXT: vand.vx v11, v8, a3 -; RV32-NEXT: vsll.vx v11, v11, a2 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vand.vx v11, v8, a4 -; RV32-NEXT: vsll.vi v11, v11, 24 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v11, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vsrl.vx v12, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -872,35 +872,35 @@ define @vp_bswap_nxv2i64( %va, @vp_bswap_nxv2i64_unmasked( %va, i32 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v10, v12 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v14, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v14, v14, v16 -; RV32-NEXT: vor.vv v12, v14, v12 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v14, v8, a3 -; RV32-NEXT: vsll.vx v14, v14, a2 -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vand.vx v14, v8, a4 -; RV32-NEXT: vsll.vi v14, v14, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v14, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1044,35 +1044,35 @@ define @vp_bswap_nxv4i64( %va, @vp_bswap_nxv4i64_unmasked( %va, i32 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v12, v16 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vsrl.vi v20, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v20, v20, v24 -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v20, v8, a3 -; RV32-NEXT: vsll.vx v20, v20, a2 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vand.vx v20, v8, a4 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a3 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vand.vx v24, v24, a4 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1221,73 +1221,73 @@ define @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64_unmasked( %va, i32 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v24, a4 -; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v0, v16, 24 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -1470,73 +1470,73 @@ define @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64_unmasked( %va, i32 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v24, a4 -; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v0, v16, 24 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v0, v8, a3 +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -1841,35 +1841,35 @@ define @vp_bswap_nxv1i48( %va, @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32 ; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v24, v8, v24 -; CHECK-NEXT: vadd.vv v8, v0, v16 -; CHECK-NEXT: vadd.vx v8, v8, a4 -; CHECK-NEXT: vadd.vx v16, v24, a4 +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vadd.vv v24, v0, v16 +; CHECK-NEXT: vadd.vx v16, v8, a4 +; CHECK-NEXT: vadd.vx v8, v24, a4 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/cross-block-cse.ll b/llvm/test/CodeGen/RISCV/rvv/cross-block-cse.ll index 8e4618d34ad2c..00e1be1765035 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cross-block-cse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cross-block-cse.ll @@ -1,18 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -O3 -mtriple=riscv64 -mattr=+v | FileCheck %s -; TODO: The case below demonstrates a regression in cross block CSE of vector -; instructions with undefined passthru operands. The second vadd.vv should be -; removed. +; The case below demonstrates cross block CSE of vector instructions with +; undefined passthru operands. define void @foo( %x, %y, ptr %p1, ptr %p2, i1 zeroext %cond) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; CHECK-NEXT: vadd.vv v10, v8, v9 -; CHECK-NEXT: vs1r.v v10, (a0) +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: bnez a2, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %falsebb -; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vs1r.v v8, (a1) ; CHECK-NEXT: .LBB0_2: # %mergebb ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll index d47de77fb4c12..182b667a0b9af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -2752,12 +2752,13 @@ define @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64_unmasked( %va, ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb -; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -3105,73 +3080,67 @@ define @vp_ctpop_nxv16i64_unmasked( %va, ; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a2, a1, 257 -; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB47_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: .LBB47_2: +; RV32-NEXT: sub a2, a0, a1 +; RV32-NEXT: sltu a3, a0, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vsrl.vi v24, v16, 1 ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a3), zero +; RV32-NEXT: vlse64.v v0, (a3), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v24, v0 +; RV32-NEXT: vsub.vv v16, v16, v24 ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v16, v16, v0 +; RV32-NEXT: vadd.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v16, 4 +; RV32-NEXT: vadd.vv v24, v16, v24 ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vv v16, v24, v16 ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v16, v16, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v16, a2 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: sub a1, a0, a1 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsrl.vx v16, v16, a2 +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: bltu a0, a1, .LBB47_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB47_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 @@ -3180,11 +3149,11 @@ define @vp_ctpop_nxv16i64_unmasked( %va, ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v16, v8, v16 -; RV32-NEXT: vand.vv v8, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb @@ -3193,18 +3162,17 @@ define @vp_ctpop_nxv16i64_unmasked( %va, ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a2 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vx v8, v8, a2 +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll index 03eaf5e60c7b6..83cc79bfc191a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1536,19 +1536,19 @@ define @cttz_nxv1i64( %va) { ; RV32F-LABEL: cttz_nxv1i64: ; RV32F: # %bb.0: ; RV32F-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32F-NEXT: vmseq.vx v0, v8, zero ; RV32F-NEXT: vrsub.vi v9, v8, 0 -; RV32F-NEXT: vand.vv v9, v8, v9 +; RV32F-NEXT: vand.vv v8, v8, v9 ; RV32F-NEXT: fsrmi a0, 1 ; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; RV32F-NEXT: vfncvt.f.xu.w v10, v9 -; RV32F-NEXT: vsrl.vi v9, v10, 23 +; RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; RV32F-NEXT: vsrl.vi v8, v9, 23 ; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV32F-NEXT: vzext.vf2 v10, v9 +; RV32F-NEXT: vzext.vf2 v9, v8 ; RV32F-NEXT: li a1, 127 -; RV32F-NEXT: vsub.vx v9, v10, a1 -; RV32F-NEXT: vmseq.vx v0, v8, zero +; RV32F-NEXT: vsub.vx v8, v9, a1 ; RV32F-NEXT: li a1, 64 -; RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 +; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32F-NEXT: fsrm a0 ; RV32F-NEXT: ret ; @@ -1574,17 +1574,17 @@ define @cttz_nxv1i64( %va) { ; RV32D-LABEL: cttz_nxv1i64: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32D-NEXT: vmseq.vx v0, v8, zero ; RV32D-NEXT: vrsub.vi v9, v8, 0 -; RV32D-NEXT: vand.vv v9, v8, v9 +; RV32D-NEXT: vand.vv v8, v8, v9 ; RV32D-NEXT: fsrmi a0, 1 -; RV32D-NEXT: vfcvt.f.xu.v v9, v9 +; RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; RV32D-NEXT: li a1, 52 -; RV32D-NEXT: vsrl.vx v9, v9, a1 +; RV32D-NEXT: vsrl.vx v8, v8, a1 ; RV32D-NEXT: li a1, 1023 -; RV32D-NEXT: vsub.vx v9, v9, a1 -; RV32D-NEXT: vmseq.vx v0, v8, zero +; RV32D-NEXT: vsub.vx v8, v8, a1 ; RV32D-NEXT: li a1, 64 -; RV32D-NEXT: vmerge.vxm v8, v9, a1, v0 +; RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32D-NEXT: fsrm a0 ; RV32D-NEXT: ret ; @@ -1706,19 +1706,19 @@ define @cttz_nxv2i64( %va) { ; RV32F-LABEL: cttz_nxv2i64: ; RV32F: # %bb.0: ; RV32F-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32F-NEXT: vmseq.vx v0, v8, zero ; RV32F-NEXT: vrsub.vi v10, v8, 0 -; RV32F-NEXT: vand.vv v10, v8, v10 +; RV32F-NEXT: vand.vv v8, v8, v10 ; RV32F-NEXT: fsrmi a0, 1 ; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV32F-NEXT: vfncvt.f.xu.w v12, v10 -; RV32F-NEXT: vsrl.vi v10, v12, 23 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: vsrl.vi v8, v10, 23 ; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV32F-NEXT: vzext.vf2 v12, v10 +; RV32F-NEXT: vzext.vf2 v10, v8 ; RV32F-NEXT: li a1, 127 -; RV32F-NEXT: vsub.vx v10, v12, a1 -; RV32F-NEXT: vmseq.vx v0, v8, zero +; RV32F-NEXT: vsub.vx v8, v10, a1 ; RV32F-NEXT: li a1, 64 -; RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32F-NEXT: fsrm a0 ; RV32F-NEXT: ret ; @@ -1744,17 +1744,17 @@ define @cttz_nxv2i64( %va) { ; RV32D-LABEL: cttz_nxv2i64: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32D-NEXT: vmseq.vx v0, v8, zero ; RV32D-NEXT: vrsub.vi v10, v8, 0 -; RV32D-NEXT: vand.vv v10, v8, v10 +; RV32D-NEXT: vand.vv v8, v8, v10 ; RV32D-NEXT: fsrmi a0, 1 -; RV32D-NEXT: vfcvt.f.xu.v v10, v10 +; RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; RV32D-NEXT: li a1, 52 -; RV32D-NEXT: vsrl.vx v10, v10, a1 +; RV32D-NEXT: vsrl.vx v8, v8, a1 ; RV32D-NEXT: li a1, 1023 -; RV32D-NEXT: vsub.vx v10, v10, a1 -; RV32D-NEXT: vmseq.vx v0, v8, zero +; RV32D-NEXT: vsub.vx v8, v8, a1 ; RV32D-NEXT: li a1, 64 -; RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32D-NEXT: fsrm a0 ; RV32D-NEXT: ret ; @@ -1876,19 +1876,19 @@ define @cttz_nxv4i64( %va) { ; RV32F-LABEL: cttz_nxv4i64: ; RV32F: # %bb.0: ; RV32F-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32F-NEXT: vmseq.vx v0, v8, zero ; RV32F-NEXT: vrsub.vi v12, v8, 0 -; RV32F-NEXT: vand.vv v12, v8, v12 +; RV32F-NEXT: vand.vv v8, v8, v12 ; RV32F-NEXT: fsrmi a0, 1 ; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32F-NEXT: vfncvt.f.xu.w v16, v12 -; RV32F-NEXT: vsrl.vi v12, v16, 23 +; RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; RV32F-NEXT: vsrl.vi v8, v12, 23 ; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32F-NEXT: vzext.vf2 v16, v12 +; RV32F-NEXT: vzext.vf2 v12, v8 ; RV32F-NEXT: li a1, 127 -; RV32F-NEXT: vsub.vx v12, v16, a1 -; RV32F-NEXT: vmseq.vx v0, v8, zero +; RV32F-NEXT: vsub.vx v8, v12, a1 ; RV32F-NEXT: li a1, 64 -; RV32F-NEXT: vmerge.vxm v8, v12, a1, v0 +; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32F-NEXT: fsrm a0 ; RV32F-NEXT: ret ; @@ -1914,17 +1914,17 @@ define @cttz_nxv4i64( %va) { ; RV32D-LABEL: cttz_nxv4i64: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32D-NEXT: vmseq.vx v0, v8, zero ; RV32D-NEXT: vrsub.vi v12, v8, 0 -; RV32D-NEXT: vand.vv v12, v8, v12 +; RV32D-NEXT: vand.vv v8, v8, v12 ; RV32D-NEXT: fsrmi a0, 1 -; RV32D-NEXT: vfcvt.f.xu.v v12, v12 +; RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; RV32D-NEXT: li a1, 52 -; RV32D-NEXT: vsrl.vx v12, v12, a1 +; RV32D-NEXT: vsrl.vx v8, v8, a1 ; RV32D-NEXT: li a1, 1023 -; RV32D-NEXT: vsub.vx v12, v12, a1 -; RV32D-NEXT: vmseq.vx v0, v8, zero +; RV32D-NEXT: vsub.vx v8, v8, a1 ; RV32D-NEXT: li a1, 64 -; RV32D-NEXT: vmerge.vxm v8, v12, a1, v0 +; RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32D-NEXT: fsrm a0 ; RV32D-NEXT: ret ; @@ -2046,19 +2046,19 @@ define @cttz_nxv8i64( %va) { ; RV32F-LABEL: cttz_nxv8i64: ; RV32F: # %bb.0: ; RV32F-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32F-NEXT: vmseq.vx v0, v8, zero ; RV32F-NEXT: vrsub.vi v16, v8, 0 -; RV32F-NEXT: vand.vv v16, v8, v16 +; RV32F-NEXT: vand.vv v8, v8, v16 ; RV32F-NEXT: fsrmi a0, 1 ; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32F-NEXT: vfncvt.f.xu.w v24, v16 -; RV32F-NEXT: vsrl.vi v16, v24, 23 +; RV32F-NEXT: vfncvt.f.xu.w v16, v8 +; RV32F-NEXT: vsrl.vi v8, v16, 23 ; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32F-NEXT: vzext.vf2 v24, v16 +; RV32F-NEXT: vzext.vf2 v16, v8 ; RV32F-NEXT: li a1, 127 -; RV32F-NEXT: vsub.vx v16, v24, a1 -; RV32F-NEXT: vmseq.vx v0, v8, zero +; RV32F-NEXT: vsub.vx v8, v16, a1 ; RV32F-NEXT: li a1, 64 -; RV32F-NEXT: vmerge.vxm v8, v16, a1, v0 +; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32F-NEXT: fsrm a0 ; RV32F-NEXT: ret ; @@ -2084,17 +2084,17 @@ define @cttz_nxv8i64( %va) { ; RV32D-LABEL: cttz_nxv8i64: ; RV32D: # %bb.0: ; RV32D-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32D-NEXT: vmseq.vx v0, v8, zero ; RV32D-NEXT: vrsub.vi v16, v8, 0 -; RV32D-NEXT: vand.vv v16, v8, v16 +; RV32D-NEXT: vand.vv v8, v8, v16 ; RV32D-NEXT: fsrmi a0, 1 -; RV32D-NEXT: vfcvt.f.xu.v v16, v16 +; RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; RV32D-NEXT: li a1, 52 -; RV32D-NEXT: vsrl.vx v16, v16, a1 +; RV32D-NEXT: vsrl.vx v8, v8, a1 ; RV32D-NEXT: li a1, 1023 -; RV32D-NEXT: vsub.vx v16, v16, a1 -; RV32D-NEXT: vmseq.vx v0, v8, zero +; RV32D-NEXT: vsub.vx v8, v8, a1 ; RV32D-NEXT: li a1, 64 -; RV32D-NEXT: vmerge.vxm v8, v16, a1, v0 +; RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; RV32D-NEXT: fsrm a0 ; RV32D-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index 5db18d2cd8763..ba8486780197e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -154,9 +154,9 @@ define i1 @extractelt_nxv128i1(* %x, i64 %idx) nounwind { ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vmerge.vim v24, v16, 1, v0 ; RV32-NEXT: vs8r.v v24, (a3) +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vmerge.vim v8, v16, 1, v0 -; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: vs8r.v v8, (a2) ; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -80 @@ -194,9 +194,9 @@ define i1 @extractelt_nxv128i1(* %x, i64 %idx) nounwind { ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vmerge.vim v24, v16, 1, v0 ; RV64-NEXT: vs8r.v v24, (a3) +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vmerge.vim v8, v16, 1, v0 -; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: vs8r.v v8, (a2) ; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -80 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll index bc2e6413f0661..5cbfeb06edf5a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll @@ -599,10 +599,10 @@ define i64 @extractelt_nxv1i64_imm( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v9, v8, a0 +; CHECK-NEXT: vmv.x.s a1, v9 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 ret i64 %r @@ -640,10 +640,10 @@ define i64 @extractelt_nxv2i64_imm( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v10, v8, a0 +; CHECK-NEXT: vmv.x.s a1, v10 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 ret i64 %r @@ -681,10 +681,10 @@ define i64 @extractelt_nxv4i64_imm( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v12, v8, a0 +; CHECK-NEXT: vmv.x.s a1, v12 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 ret i64 %r @@ -722,10 +722,10 @@ define i64 @extractelt_nxv8i64_imm( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v16, v8, a0 +; CHECK-NEXT: vmv.x.s a1, v16 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 ret i64 %r @@ -957,9 +957,9 @@ define i64 @extractelt_nxv16i64_0( %v) { ; CHECK-LABEL: extractelt_nxv16i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v8, 1 -; CHECK-NEXT: vmv.x.s a1, v16 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 0 ret i64 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll index 30c8216dac025..100702e492459 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -146,9 +146,9 @@ define i64 @bitcast_v8i8_i64(<8 x i8> %a) { ; RV32ELEN32-LABEL: bitcast_v8i8_i64: ; RV32ELEN32: # %bb.0: ; RV32ELEN32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ELEN32-NEXT: vslidedown.vi v9, v8, 1 -; RV32ELEN32-NEXT: vmv.x.s a1, v9 ; RV32ELEN32-NEXT: vmv.x.s a0, v8 +; RV32ELEN32-NEXT: vslidedown.vi v8, v8, 1 +; RV32ELEN32-NEXT: vmv.x.s a1, v8 ; RV32ELEN32-NEXT: ret ; ; RV64ELEN32-LABEL: bitcast_v8i8_i64: @@ -184,9 +184,9 @@ define i64 @bitcast_v4i16_i64(<4 x i16> %a) { ; RV32ELEN32-LABEL: bitcast_v4i16_i64: ; RV32ELEN32: # %bb.0: ; RV32ELEN32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ELEN32-NEXT: vslidedown.vi v9, v8, 1 -; RV32ELEN32-NEXT: vmv.x.s a1, v9 ; RV32ELEN32-NEXT: vmv.x.s a0, v8 +; RV32ELEN32-NEXT: vslidedown.vi v8, v8, 1 +; RV32ELEN32-NEXT: vmv.x.s a1, v8 ; RV32ELEN32-NEXT: ret ; ; RV64ELEN32-LABEL: bitcast_v4i16_i64: @@ -222,9 +222,9 @@ define i64 @bitcast_v2i32_i64(<2 x i32> %a) { ; RV32ELEN32-LABEL: bitcast_v2i32_i64: ; RV32ELEN32: # %bb.0: ; RV32ELEN32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ELEN32-NEXT: vslidedown.vi v9, v8, 1 -; RV32ELEN32-NEXT: vmv.x.s a1, v9 ; RV32ELEN32-NEXT: vmv.x.s a0, v8 +; RV32ELEN32-NEXT: vslidedown.vi v8, v8, 1 +; RV32ELEN32-NEXT: vmv.x.s a1, v8 ; RV32ELEN32-NEXT: ret ; ; RV64ELEN32-LABEL: bitcast_v2i32_i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 452b42f321ae5..dfb5213e460fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1396,9 +1396,10 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e ; RV32-NEXT: vand.vx v11, v11, a3, v0.t ; RV32-NEXT: vor.vv v10, v11, v10, v0.t ; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1530,9 +1531,10 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v8, a4 ; RV32-NEXT: vsll.vi v10, v10, 24 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -1663,31 +1665,31 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v14, v14, a3, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t -; RV32-NEXT: vsrl.vi v14, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v14, v14, a4, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: li a5, 85 +; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t +; RV32-NEXT: li a4, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v18, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vand.vv v16, v16, v18, v0.t -; RV32-NEXT: vor.vv v14, v16, v14, v0.t +; RV32-NEXT: vand.vv v14, v14, v16, v0.t +; RV32-NEXT: vsrl.vi v18, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v18, v18, a4, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t ; RV32-NEXT: vsll.vx v14, v8, a1, v0.t -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v14, v14, v16, v0.t -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a3, v0.t +; RV32-NEXT: vsll.vx v18, v18, a2, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a4, v0.t +; RV32-NEXT: vsll.vi v18, v18, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v18, v8, v0.t ; RV32-NEXT: vor.vv v8, v14, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t @@ -1790,39 +1792,39 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) ; RV32: # %bb.0: ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v10, v12 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: li a5, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v16, v16, a5, v0 +; RV32-NEXT: vmerge.vxm v14, v14, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v14, v14, v16 -; RV32-NEXT: vor.vv v12, v14, v12 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v14, v8, a3 -; RV32-NEXT: vsll.vx v14, v14, a2 -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vand.vx v14, v8, a4 -; RV32-NEXT: vsll.vi v14, v14, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v14, v16, v14 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v14, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -1934,35 +1936,35 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v20, v20, a3, v0.t -; RV32-NEXT: vor.vv v16, v20, v16, v0.t -; RV32-NEXT: vsrl.vi v20, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v20, a4, v0.t -; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t -; RV32-NEXT: lui a5, 5 -; RV32-NEXT: addi a5, a5, 1365 +; RV32-NEXT: vor.vv v20, v20, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: lui a4, 5 +; RV32-NEXT: addi a4, a4, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v20, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vand.vv v28, v28, v20, v0.t -; RV32-NEXT: vor.vv v24, v28, v24, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v28, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v28, v28, a4, v0.t +; RV32-NEXT: vor.vv v24, v24, v28, v0.t +; RV32-NEXT: vor.vv v20, v24, v20, v0.t ; RV32-NEXT: vsll.vx v24, v8, a1, v0.t ; RV32-NEXT: vand.vx v28, v8, a3, v0.t ; RV32-NEXT: vsll.vx v28, v28, a2, v0.t ; RV32-NEXT: vor.vv v24, v24, v28, v0.t ; RV32-NEXT: vand.vx v28, v8, a4, v0.t ; RV32-NEXT: vsll.vi v28, v28, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v20, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v28, v8, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v20, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -2063,40 +2065,40 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) ; RV32: # %bb.0: ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v12, v16 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vsrl.vi v20, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: lui a5, 5 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v24, 0 +; RV32-NEXT: vmv.v.i v20, 0 ; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v24, v24, a5, v0 +; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v20, v20, v24 -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v20, v8, a3 -; RV32-NEXT: vsll.vx v20, v20, a2 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vand.vx v20, v8, a4 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a3 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vand.vv v20, v24, v20 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -2202,36 +2204,54 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vx v24, v8, a3, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a5, a1, -256 -; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: addi a4, a1, -256 +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: li a5, 40 +; RV32-NEXT: vsll.vx v24, v24, a5, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vand.vx v24, v24, a6, v0.t +; RV32-NEXT: vand.vx v24, v8, a6, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -2240,103 +2260,189 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a7, sp, 16 +; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a7, sp, 16 -; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v16, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 3 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v24, v8, a5, v0.t -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a6, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV32-NEXT: vand.vx v16, v24, a4, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a6, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 4, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 4, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 61681 ; RV32-NEXT: addi a3, a3, -241 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: addi a3, a3, 819 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2441,37 +2547,37 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: li a4, 32 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: lui a5, 349525 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 +; RV32-NEXT: vmerge.vxm v24, v24, a6, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v0, v0, v24 ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a6 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a6 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a7, sp, 16 ; RV32-NEXT: vl8r.v v0, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vand.vx v8, v8, a6 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 @@ -2582,36 +2688,54 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vx v24, v8, a3, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a5, a1, -256 -; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: addi a4, a1, -256 +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: li a5, 40 +; RV32-NEXT: vsll.vx v24, v24, a5, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vand.vx v24, v24, a6, v0.t +; RV32-NEXT: vand.vx v24, v8, a6, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -2620,103 +2744,189 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a7, sp, 16 +; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a7, sp, 16 -; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v16, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 3 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v24, v8, a5, v0.t -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a6, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV32-NEXT: vand.vx v16, v24, a4, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a6, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 4, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 4, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 61681 ; RV32-NEXT: addi a3, a3, -241 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: addi a3, a3, 819 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2821,37 +3031,37 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: li a4, 32 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: lui a5, 349525 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 +; RV32-NEXT: vmerge.vxm v24, v24, a6, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v0, v0, v24 ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a6 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a6 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a7, sp, 16 ; RV32-NEXT: vl8r.v v0, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vand.vx v8, v8, a6 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll index f1b00e3986400..1dd84b1ea8178 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -95,10 +95,10 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) { ; RV32-NEXT: vand.vx v9, v9, a1 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v10, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vand.vx v10, v8, a1 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -134,10 +134,10 @@ define void @bitreverse_v4i32(ptr %x, ptr %y) { ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v8, 24 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vsll.vi v8, v8, 24 +; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: lui a1, 61681 @@ -183,38 +183,38 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v9, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vmv.v.i v0, 5 -; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v10, v10, a4, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vand.vv v10, v10, v9 -; RV32-NEXT: vsrl.vi v11, v8, 24 -; RV32-NEXT: lui a1, 4080 -; RV32-NEXT: vand.vx v11, v11, a1 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v11, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v12, v8, a3 -; RV32-NEXT: lui a4, 16 -; RV32-NEXT: addi a4, a4, -256 +; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: vand.vv v11, v11, v10 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vor.vv v11, v12, v11 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vand.vv v9, v8, v9 -; RV32-NEXT: vsll.vi v9, v9, 8 -; RV32-NEXT: vand.vx v11, v8, a1 -; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vor.vv v11, v11, v12 ; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vsll.vx v11, v8, a2 +; RV32-NEXT: vsll.vx v11, v8, a1 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vx v12, v12, a2 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: vsll.vi v10, v10, 8 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vx v8, v8, a3 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -518,10 +518,10 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vsll.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV32-NEXT: lui a1, 61681 @@ -557,10 +557,10 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 +; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 +; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: lui a1, 61681 @@ -598,10 +598,10 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV32-NEXT: lui a3, 61681 @@ -628,10 +628,10 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v9, 24 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v11, v9 +; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 +; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 @@ -664,10 +664,10 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 +; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: lui a3, 61681 @@ -694,10 +694,10 @@ define void @bitreverse_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v11, v9 +; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 +; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 @@ -746,31 +746,31 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) { ; LMULMAX2-RV32-NEXT: addi a3, a3, -256 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 ; LMULMAX2-RV32-NEXT: li a4, 85 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v0, a4 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV32-NEXT: lui a4, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a4, v0 +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 8 +; LMULMAX2-RV32-NEXT: vand.vv v14, v14, v12 ; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 24 ; LMULMAX2-RV32-NEXT: lui a4, 4080 ; LMULMAX2-RV32-NEXT: vand.vx v16, v16, a4 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vor.vv v14, v14, v16 +; LMULMAX2-RV32-NEXT: vor.vv v10, v14, v10 +; LMULMAX2-RV32-NEXT: vsll.vx v14, v8, a1 ; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 ; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vand.vv v14, v8, v14 -; LMULMAX2-RV32-NEXT: vsll.vi v14, v14, 8 +; LMULMAX2-RV32-NEXT: vor.vv v14, v14, v16 +; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v12 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 ; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vor.vv v8, v14, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV32-NEXT: lui a1, 61681 @@ -870,40 +870,40 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) { ; LMULMAX1-RV32-LABEL: bitreverse_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) +; LMULMAX1-RV32-NEXT: li a2, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v9, v10, a2 +; LMULMAX1-RV32-NEXT: li a3, 40 +; LMULMAX1-RV32-NEXT: vsrl.vx v11, v10, a3 +; LMULMAX1-RV32-NEXT: lui a4, 16 +; LMULMAX1-RV32-NEXT: addi a4, a4, -256 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v9 +; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 -; LMULMAX1-RV32-NEXT: lui a2, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a2, v0 +; LMULMAX1-RV32-NEXT: lui a5, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a5, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v10, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v10, 24 -; LMULMAX1-RV32-NEXT: lui a2, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a2 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: li a3, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v12, v10, a3 -; LMULMAX1-RV32-NEXT: li a4, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v13, v10, a4 -; LMULMAX1-RV32-NEXT: lui a5, 16 -; LMULMAX1-RV32-NEXT: addi a5, a5, -256 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v10, 8 +; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v13, v10, 24 +; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vand.vv v12, v10, v9 -; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 8 -; LMULMAX1-RV32-NEXT: vand.vx v13, v10, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vsll.vx v13, v10, a3 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 +; LMULMAX1-RV32-NEXT: vor.vv v11, v12, v11 +; LMULMAX1-RV32-NEXT: vsll.vx v12, v10, a2 +; LMULMAX1-RV32-NEXT: vand.vx v13, v10, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 +; LMULMAX1-RV32-NEXT: vand.vv v13, v10, v9 +; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 8 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV32-NEXT: vsll.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: vor.vv v10, v13, v10 -; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v12 +; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 24 +; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v13 +; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v10, 4 ; LMULMAX1-RV32-NEXT: lui a6, 61681 @@ -935,26 +935,26 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) { ; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v14 ; LMULMAX1-RV32-NEXT: vadd.vv v10, v10, v10 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v15, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a2 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v15 +; LMULMAX1-RV32-NEXT: vsrl.vx v11, v8, a2 ; LMULMAX1-RV32-NEXT: vsrl.vx v15, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vx v16, v8, a4 +; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a4 +; LMULMAX1-RV32-NEXT: vor.vv v11, v15, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v15, v8, 8 +; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v16, v8, 24 ; LMULMAX1-RV32-NEXT: vand.vx v16, v16, a5 -; LMULMAX1-RV32-NEXT: vor.vv v15, v16, v15 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v15 +; LMULMAX1-RV32-NEXT: vor.vv v15, v15, v16 +; LMULMAX1-RV32-NEXT: vor.vv v11, v15, v11 +; LMULMAX1-RV32-NEXT: vsll.vx v15, v8, a2 +; LMULMAX1-RV32-NEXT: vand.vx v16, v8, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v16, v16, a3 +; LMULMAX1-RV32-NEXT: vor.vv v15, v15, v16 ; LMULMAX1-RV32-NEXT: vand.vv v9, v8, v9 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vx v15, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v15, v15, 24 -; LMULMAX1-RV32-NEXT: vor.vv v9, v15, v9 -; LMULMAX1-RV32-NEXT: vsll.vx v15, v8, a3 ; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a4 -; LMULMAX1-RV32-NEXT: vor.vv v8, v15, v8 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 +; LMULMAX1-RV32-NEXT: vor.vv v8, v15, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index 70b10724d2492..e20a36e184725 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -436,9 +436,10 @@ define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vand.vx v11, v11, a3, v0.t ; RV32-NEXT: vor.vv v10, v11, v10, v0.t ; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -513,9 +514,10 @@ define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v8, a4 ; RV32-NEXT: vsll.vi v10, v10, 24 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma @@ -589,31 +591,31 @@ define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v14, v14, a3, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t -; RV32-NEXT: vsrl.vi v14, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v14, v14, a4, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: li a5, 85 +; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t +; RV32-NEXT: li a4, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v18, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vand.vv v16, v16, v18, v0.t -; RV32-NEXT: vor.vv v14, v16, v14, v0.t +; RV32-NEXT: vand.vv v14, v14, v16, v0.t +; RV32-NEXT: vsrl.vi v18, v8, 24, v0.t +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v18, v18, a0, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t ; RV32-NEXT: vsll.vx v14, v8, a1, v0.t -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v14, v14, v16, v0.t -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a3, v0.t +; RV32-NEXT: vsll.vx v18, v18, a2, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a0, v0.t +; RV32-NEXT: vsll.vi v18, v18, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v18, v8, v0.t ; RV32-NEXT: vor.vv v8, v14, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: ret @@ -659,39 +661,39 @@ define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v10, v12 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: li a5, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v16, v16, a5, v0 +; RV32-NEXT: vmerge.vxm v14, v14, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v14, v14, v16 -; RV32-NEXT: vor.vv v12, v14, v12 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v14, v8, a3 -; RV32-NEXT: vsll.vx v14, v14, a2 -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vand.vx v14, v8, a4 -; RV32-NEXT: vsll.vi v14, v14, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vand.vv v16, v8, v14 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vsrl.vx v16, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v14, v16, v14 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v14, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v4i64_unmasked: @@ -746,35 +748,35 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v20, v20, a3, v0.t -; RV32-NEXT: vor.vv v16, v20, v16, v0.t -; RV32-NEXT: vsrl.vi v20, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v20, a4, v0.t -; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t -; RV32-NEXT: lui a5, 5 -; RV32-NEXT: addi a5, a5, 1365 +; RV32-NEXT: vor.vv v20, v20, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: lui a4, 5 +; RV32-NEXT: addi a4, a4, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v20, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vand.vv v28, v28, v20, v0.t -; RV32-NEXT: vor.vv v24, v28, v24, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v28, v8, 24, v0.t +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v28, v28, a0, v0.t +; RV32-NEXT: vor.vv v24, v24, v28, v0.t +; RV32-NEXT: vor.vv v20, v24, v20, v0.t ; RV32-NEXT: vsll.vx v24, v8, a1, v0.t ; RV32-NEXT: vand.vx v28, v8, a3, v0.t ; RV32-NEXT: vsll.vx v28, v28, a2, v0.t ; RV32-NEXT: vor.vv v24, v24, v28, v0.t -; RV32-NEXT: vand.vx v28, v8, a4, v0.t +; RV32-NEXT: vand.vx v28, v8, a0, v0.t ; RV32-NEXT: vsll.vi v28, v28, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v20, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v28, v8, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v8i64: @@ -818,40 +820,40 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32: # %bb.0: ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v12, v16 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vsrl.vi v20, v8, 8 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: lui a5, 5 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v24, 0 +; RV32-NEXT: vmv.v.i v20, 0 ; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v24, v24, a5, v0 +; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v20, v20, v24 -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v20, v8, a3 -; RV32-NEXT: vsll.vx v20, v20, a2 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vand.vx v20, v8, a4 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vand.vv v24, v8, v20 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a3 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vand.vv v20, v24, v20 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v8i64_unmasked: @@ -900,36 +902,54 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: li a5, 32 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a6, sp, 16 -; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; RV32-NEXT: lui a6, 349525 ; RV32-NEXT: addi a6, a6, 1365 @@ -938,75 +958,71 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a6 ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: vsll.vx v24, v24, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v24, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a4, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1084,37 +1100,37 @@ define <15 x i64> @vp_bswap_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: li a4, 32 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: lui a5, 349525 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 +; RV32-NEXT: vmerge.vxm v24, v24, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v0, v0, v24 ; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a0 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vand.vx v8, v8, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 @@ -1170,36 +1186,54 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: li a5, 32 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a6, sp, 16 -; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma ; RV32-NEXT: lui a6, 349525 ; RV32-NEXT: addi a6, a6, 1365 @@ -1208,75 +1242,71 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: lui a7, 1044480 ; RV32-NEXT: vmv.v.x v0, a6 ; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: vsll.vx v24, v24, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v24, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a4, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1354,37 +1384,37 @@ define <16 x i64> @vp_bswap_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: li a4, 32 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: lui a5, 349525 ; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a6, 1044480 ; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 +; RV32-NEXT: vmerge.vxm v24, v24, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v0, v0, v24 ; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a0 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vand.vx v8, v8, a0 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll index e393fef62a251..7006409c6eb87 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -43,10 +43,10 @@ define void @bswap_v4i32(ptr %x, ptr %y) { ; RV32-NEXT: vand.vx v9, v9, a1 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v10, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vand.vx v10, v8, a1 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret @@ -61,10 +61,10 @@ define void @bswap_v4i32(ptr %x, ptr %y) { ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v8, 24 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vsll.vi v8, v8, 24 +; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret @@ -89,38 +89,38 @@ define void @bswap_v2i64(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v9, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vmv.v.i v0, 5 -; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v10, v10, a4, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vand.vv v10, v10, v9 -; RV32-NEXT: vsrl.vi v11, v8, 24 -; RV32-NEXT: lui a1, 4080 -; RV32-NEXT: vand.vx v11, v11, a1 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v11, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v12, v8, a3 -; RV32-NEXT: lui a4, 16 -; RV32-NEXT: addi a4, a4, -256 +; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: vand.vv v11, v11, v10 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vor.vv v11, v12, v11 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vand.vv v9, v8, v9 -; RV32-NEXT: vsll.vi v9, v9, 8 -; RV32-NEXT: vand.vx v11, v8, a1 -; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vor.vv v11, v11, v12 ; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vsll.vx v11, v8, a2 +; RV32-NEXT: vsll.vx v11, v8, a1 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vx v12, v12, a2 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: vsll.vi v10, v10, 8 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vx v8, v8, a3 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; @@ -253,10 +253,10 @@ define void @bswap_v8i32(ptr %x, ptr %y) { ; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vsll.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -271,10 +271,10 @@ define void @bswap_v8i32(ptr %x, ptr %y) { ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV64-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 +; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 +; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret @@ -291,19 +291,19 @@ define void @bswap_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v9, 24 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV32-NEXT: vor.vv v9, v11, v9 +; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 +; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) @@ -321,19 +321,19 @@ define void @bswap_v8i32(ptr %x, ptr %y) { ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 +; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 8 -; LMULMAX1-RV64-NEXT: vor.vv v9, v11, v9 +; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 +; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) @@ -367,31 +367,31 @@ define void @bswap_v4i64(ptr %x, ptr %y) { ; LMULMAX2-RV32-NEXT: addi a3, a3, -256 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 ; LMULMAX2-RV32-NEXT: li a4, 85 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v0, a4 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV32-NEXT: lui a4, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a4, v0 +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 8 +; LMULMAX2-RV32-NEXT: vand.vv v14, v14, v12 ; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 24 ; LMULMAX2-RV32-NEXT: lui a4, 4080 ; LMULMAX2-RV32-NEXT: vand.vx v16, v16, a4 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vor.vv v14, v14, v16 +; LMULMAX2-RV32-NEXT: vor.vv v10, v14, v10 +; LMULMAX2-RV32-NEXT: vsll.vx v14, v8, a1 ; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 ; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vand.vv v14, v8, v14 -; LMULMAX2-RV32-NEXT: vsll.vi v14, v14, 8 +; LMULMAX2-RV32-NEXT: vor.vv v14, v14, v16 +; LMULMAX2-RV32-NEXT: vand.vv v12, v8, v12 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 ; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vor.vv v8, v14, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -434,64 +434,64 @@ define void @bswap_v4i64(ptr %x, ptr %y) { ; LMULMAX1-RV32-LABEL: bswap_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, 0 +; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) +; LMULMAX1-RV32-NEXT: li a2, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 +; LMULMAX1-RV32-NEXT: li a3, 40 +; LMULMAX1-RV32-NEXT: vsrl.vx v11, v8, a3 +; LMULMAX1-RV32-NEXT: lui a4, 16 +; LMULMAX1-RV32-NEXT: addi a4, a4, -256 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 +; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 -; LMULMAX1-RV32-NEXT: lui a2, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 +; LMULMAX1-RV32-NEXT: lui a5, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v11, v11, a5, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 24 -; LMULMAX1-RV32-NEXT: lui a2, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a2 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: li a3, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v12, v9, a3 -; LMULMAX1-RV32-NEXT: li a4, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v13, v9, a4 -; LMULMAX1-RV32-NEXT: lui a5, 16 -; LMULMAX1-RV32-NEXT: addi a5, a5, -256 -; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vand.vv v12, v9, v10 -; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 8 -; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vsll.vx v13, v9, a3 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a4 -; LMULMAX1-RV32-NEXT: vor.vv v9, v13, v9 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a2 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vsrl.vx v12, v8, a3 -; LMULMAX1-RV32-NEXT: vsrl.vx v13, v8, a4 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v8, 8 +; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v13, v8, 24 +; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 -; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v10 -; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 8 -; LMULMAX1-RV32-NEXT: vand.vx v12, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 24 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 ; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX1-RV32-NEXT: vsll.vx v12, v8, a3 +; LMULMAX1-RV32-NEXT: vsll.vx v12, v8, a2 +; LMULMAX1-RV32-NEXT: vand.vx v13, v8, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 +; LMULMAX1-RV32-NEXT: vand.vv v13, v8, v11 +; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 8 ; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a4 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v13 ; LMULMAX1-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) +; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2 +; LMULMAX1-RV32-NEXT: vsrl.vx v12, v9, a3 +; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a4 +; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 8 +; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v13, v9, 24 +; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 +; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX1-RV32-NEXT: vsll.vx v12, v9, a2 +; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 +; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 +; LMULMAX1-RV32-NEXT: vand.vv v11, v9, v11 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 +; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vor.vv v9, v12, v9 +; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 +; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: bswap_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll index 1c2efcaae560f..768a00e39ba7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll @@ -412,9 +412,9 @@ define fastcc <32 x i32> @pass_vector_arg_indirect_stack(<32 x i32> %x, <32 x i3 ; LMULMAX4-NEXT: addi s0, sp, 256 ; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 ; LMULMAX4-NEXT: andi sp, sp, -128 +; LMULMAX4-NEXT: addi a0, sp, 64 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: vmv.v.i v8, 0 -; LMULMAX4-NEXT: addi a0, sp, 64 ; LMULMAX4-NEXT: vse32.v v8, (a0) ; LMULMAX4-NEXT: mv a0, sp ; LMULMAX4-NEXT: li a1, 1 @@ -516,9 +516,9 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32> ; LMULMAX4-NEXT: sd a0, 136(sp) ; LMULMAX4-NEXT: li a0, 13 ; LMULMAX4-NEXT: sd a0, 0(sp) +; LMULMAX4-NEXT: addi a0, sp, 72 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: vmv.v.i v8, 0 -; LMULMAX4-NEXT: addi a0, sp, 72 ; LMULMAX4-NEXT: vse32.v v8, (a0) ; LMULMAX4-NEXT: addi a0, sp, 8 ; LMULMAX4-NEXT: li a1, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll index f1a87318d25dc..fb8e2429b3a39 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -567,15 +567,16 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind { ; ; LMULMAX2-RV32F-LABEL: ctlz_v2i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v9, v9, v8 ; LMULMAX2-RV32F-NEXT: li a1, 64 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -587,12 +588,12 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v9, v8 ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v9, 23 +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 ; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 ; LMULMAX2-RV64F-NEXT: li a1, 64 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1263,15 +1264,16 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind { ; ; LMULMAX2-RV32F-LABEL: ctlz_v4i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v12, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v8 ; LMULMAX2-RV32F-NEXT: li a1, 64 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma @@ -1283,16 +1285,16 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v11, v8 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v11, 23 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v12, v10, v8 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 ; LMULMAX2-RV64F-NEXT: li a1, 64 ; LMULMAX2-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV64F-NEXT: vminu.vx v8, v12, a1 +; LMULMAX2-RV64F-NEXT: vminu.vx v8, v10, a1 ; LMULMAX2-RV64F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64F-NEXT: ret ; @@ -1887,15 +1889,16 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; ; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v2i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v9, a1 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v9, v9, v8 ; LMULMAX2-RV32F-NEXT: vse64.v v9, (a0) ; LMULMAX2-RV32F-NEXT: ret @@ -1904,12 +1907,12 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v9, v8 ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v9, 23 +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 ; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 ; LMULMAX2-RV64F-NEXT: vse64.v v10, (a0) ; LMULMAX2-RV64F-NEXT: ret @@ -2553,15 +2556,16 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; ; LMULMAX2-RV32F-LABEL: ctlz_zero_undef_v4i64: ; LMULMAX2-RV32F: # %bb.0: -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV32F-NEXT: fsrm a1 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: li a1, 190 +; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v8 -; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v12, 23 ; LMULMAX2-RV32F-NEXT: vwsubu.wv v10, v10, v8 ; LMULMAX2-RV32F-NEXT: vse64.v v10, (a0) ; LMULMAX2-RV32F-NEXT: ret @@ -2570,14 +2574,14 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV64F: # %bb.0: ; LMULMAX2-RV64F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV64F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64F-NEXT: li a1, 190 -; LMULMAX2-RV64F-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64F-NEXT: fsrmi a1, 1 -; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v11, v8 +; LMULMAX2-RV64F-NEXT: vfncvt.f.xu.w v10, v8 ; LMULMAX2-RV64F-NEXT: fsrm a1 -; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v11, 23 -; LMULMAX2-RV64F-NEXT: vwsubu.vv v12, v10, v8 -; LMULMAX2-RV64F-NEXT: vse64.v v12, (a0) +; LMULMAX2-RV64F-NEXT: vsrl.vi v8, v10, 23 +; LMULMAX2-RV64F-NEXT: li a1, 190 +; LMULMAX2-RV64F-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV64F-NEXT: vwsubu.vv v10, v9, v8 +; LMULMAX2-RV64F-NEXT: vse64.v v10, (a0) ; LMULMAX2-RV64F-NEXT: ret ; ; LMULMAX2-RV32D-LABEL: ctlz_zero_undef_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index fa6269b443a6d..71a621c10257d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1871,110 +1871,104 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vslidedown.vi v24, v0, 2 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a2, a1, 1365 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB34_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB34_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a2, a2, a4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a3, .LBB34_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB34_2: -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 4112 -; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v8, v0.t +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t @@ -1999,7 +1993,7 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -2015,7 +2009,8 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2028,7 +2023,8 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2043,8 +2039,7 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2158,65 +2153,72 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a2, a1, 1365 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: li a3, 16 -; RV32-NEXT: vmv.v.x v0, a2 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a3, .LBB35_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB35_2: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: sub sp, sp, a3 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vmv.v.x v0, a3 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; RV32-NEXT: li a2, 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB35_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB35_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a2, a2, a4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: lui a3, 4112 -; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v24, v24, v8 ; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vsrl.vx v8, v24, a1 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 @@ -2227,15 +2229,20 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8 -; RV32-NEXT: vsub.vv v24, v16, v24 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v24, v8, v24 ; RV32-NEXT: vand.vv v8, v24, v0 ; RV32-NEXT: vsrl.vi v24, v24, 2 ; RV32-NEXT: vand.vv v24, v24, v0 @@ -2258,7 +2265,8 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index b70345fe89cc0..146091a1ff6ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -2133,32 +2133,47 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb -; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a2, a2, a1 +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vslidedown.vi v24, v0, 2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a0, a1, .LBB34_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB34_2: +; RV32-NEXT: li a1, 1 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v8, v16, a1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 56 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: li a1, 1 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vxor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsub.vx v16, v16, a1, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vxor.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -2176,27 +2191,27 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 56 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -2207,6 +2222,12 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a4, a4, 819 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 @@ -2227,12 +2248,6 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -2246,33 +2261,36 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a4, a4, -241 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 48 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vmv.v.x v16, a4 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a3, .LBB34_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB34_2: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a0, vlenb @@ -2281,23 +2299,26 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vx v8, v16, a1, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsub.vx v16, v16, a1, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vxor.vv v16, v16, v8, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -2309,50 +2330,31 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 @@ -2371,7 +2373,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2382,9 +2385,12 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 6 ; RV32-NEXT: add sp, sp, a0 @@ -2502,23 +2508,24 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v8, v16 -; RV32-NEXT: li a3, 1 -; RV32-NEXT: vsub.vx v8, v8, a3 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsub.vx v16, v8, a2 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a4, 349525 ; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 @@ -2531,7 +2538,7 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a4, 209715 ; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v16, a4 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 @@ -2542,7 +2549,7 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: lui a4, 61681 ; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -2553,23 +2560,23 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, a0, -16 -; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -2577,7 +2584,7 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vxor.vv v8, v0, v8 -; RV32-NEXT: vsub.vx v0, v0, a3 +; RV32-NEXT: vsub.vx v0, v0, a2 ; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vsrl.vi v0, v8, 1 ; RV32-NEXT: csrr a0, vlenb @@ -4778,32 +4785,47 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb -; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a2, a2, a1 +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vslidedown.vi v24, v0, 2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a0, a1, .LBB70_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB70_2: +; RV32-NEXT: li a1, 1 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v8, v16, a1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 56 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: li a1, 1 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vxor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsub.vx v16, v16, a1, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vxor.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -4821,27 +4843,27 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 56 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 ; RV32-NEXT: mul a4, a4, a5 @@ -4852,6 +4874,12 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a4, a4, 819 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 48 @@ -4872,12 +4900,6 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -4891,33 +4913,36 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a4, a4, -241 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 48 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vmv.v.x v16, a4 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a3, .LBB70_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB70_2: +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a0, vlenb @@ -4926,23 +4951,26 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vx v8, v16, a1, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsub.vx v16, v16, a1, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vxor.vv v16, v16, v8, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -4954,50 +4982,31 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 @@ -5016,7 +5025,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -5027,9 +5037,12 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 6 ; RV32-NEXT: add sp, sp, a0 @@ -5147,23 +5160,24 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v8, v16 -; RV32-NEXT: li a3, 1 -; RV32-NEXT: vsub.vx v8, v8, a3 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsub.vx v16, v8, a2 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a4, 349525 ; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 @@ -5176,7 +5190,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a4, 209715 ; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v16, a4 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 @@ -5187,7 +5201,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: lui a4, 61681 ; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 @@ -5198,23 +5212,23 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: lui a4, 4112 ; RV32-NEXT: addi a4, a4, 257 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, a0, -16 -; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, a0, -16 +; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -5222,7 +5236,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vxor.vv v8, v0, v8 -; RV32-NEXT: vsub.vx v0, v0, a3 +; RV32-NEXT: vsub.vx v0, v0, a2 ; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vsrl.vi v0, v8, 1 ; RV32-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index de89cb36373fe..c97163ca1669a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -548,20 +548,20 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.i v9, 0 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vsub.vv v10, v9, v8 -; LMULMAX2-RV32F-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32F-NEXT: vsub.vv v9, v9, v8 +; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v11, v10 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v11, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v11, v10 +; LMULMAX2-RV32F-NEXT: vzext.vf2 v9, v8 ; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v10, v11, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32F-NEXT: vsub.vx v8, v9, a1 ; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -592,18 +592,18 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV32D-NEXT: vmv.v.i v9, 0 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vsub.vv v10, v9, v8 -; LMULMAX2-RV32D-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32D-NEXT: vsub.vv v9, v9, v8 +; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v10 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; LMULMAX2-RV32D-NEXT: fsrm a1 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v10, v10, a1 +; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -633,18 +633,18 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind { ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8-RV32-NEXT: vmv.v.i v9, 0 ; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vsub.vv v10, v9, v8 -; LMULMAX8-RV32-NEXT: vand.vv v10, v8, v10 +; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX8-RV32-NEXT: vsub.vv v9, v9, v8 +; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v9 ; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v10, v10 +; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v8, v8 ; LMULMAX8-RV32-NEXT: fsrm a1 ; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v10, v10, a1 +; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v10, v10, a1 -; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v9 +; LMULMAX8-RV32-NEXT: vsub.vx v8, v8, a1 ; LMULMAX8-RV32-NEXT: li a1, 64 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV32-NEXT: ret ; @@ -1232,20 +1232,20 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.i v10, 0 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vsub.vv v12, v10, v8 -; LMULMAX2-RV32F-NEXT: vand.vv v12, v8, v12 +; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32F-NEXT: vsub.vv v10, v10, v8 +; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v14, v12 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v12, v14, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v14, v12 +; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v8 ; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v12, v14, a1 -; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32F-NEXT: vsub.vx v8, v10, a1 ; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -1276,18 +1276,18 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32D-NEXT: vmv.v.i v10, 0 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vsub.vv v12, v10, v8 -; LMULMAX2-RV32D-NEXT: vand.vv v12, v8, v12 +; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32D-NEXT: vsub.vv v10, v10, v8 +; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v12, v12 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 ; LMULMAX2-RV32D-NEXT: fsrm a1 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v12, v12, a1 +; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v12, v12, a1 -; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 ; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -1317,18 +1317,18 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind { ; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX8-RV32-NEXT: vmv.v.i v10, 0 ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vsub.vv v12, v10, v8 -; LMULMAX8-RV32-NEXT: vand.vv v12, v8, v12 +; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX8-RV32-NEXT: vsub.vv v10, v10, v8 +; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v10 ; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v12, v12 +; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v8, v8 ; LMULMAX8-RV32-NEXT: fsrm a1 ; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v12, v12, a1 +; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 ; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v12, v12, a1 -; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v10 +; LMULMAX8-RV32-NEXT: vsub.vx v8, v8, a1 ; LMULMAX8-RV32-NEXT: li a1, 64 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v12, a1, v0 +; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index dd4467c629615..dd9f5874fd66d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -48,11 +48,11 @@ define i64 @extractelt_v2i64(ptr %x) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: extractelt_v2i64: @@ -154,10 +154,10 @@ define i64 @extractelt_v4i64(ptr %x) nounwind { ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v10, v8, a0 +; RV32-NEXT: vmv.x.s a1, v10 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: extractelt_v4i64: @@ -803,14 +803,14 @@ define i32 @extractelt_udiv_v4i32(<4 x i32> %x) { ; ; RV64M-LABEL: extractelt_udiv_v4i32: ; RV64M: # %bb.0: +; RV64M-NEXT: lui a0, 322639 +; RV64M-NEXT: addiw a0, a0, -945 +; RV64M-NEXT: slli a0, a0, 32 ; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64M-NEXT: vslidedown.vi v8, v8, 2 -; RV64M-NEXT: vmv.x.s a0, v8 -; RV64M-NEXT: slli a0, a0, 32 -; RV64M-NEXT: lui a1, 322639 -; RV64M-NEXT: addiw a1, a1, -945 +; RV64M-NEXT: vmv.x.s a1, v8 ; RV64M-NEXT: slli a1, a1, 32 -; RV64M-NEXT: mulhu a0, a0, a1 +; RV64M-NEXT: mulhu a0, a1, a0 ; RV64M-NEXT: srli a0, a0, 34 ; RV64M-NEXT: ret %bo = udiv <4 x i32> %x, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll index 48aafd7ab148d..5589dfdad7839 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -235,12 +235,12 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vv v10, v8, v8 -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v8, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmfeq.vv v8, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vfmax.vv v8, v11, v8 ; CHECK-NEXT: ret %c = fadd nnan <2 x half> %a, %a %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b) @@ -253,12 +253,12 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vv v10, v9, v9 -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v9, v8, v8 -; CHECK-NEXT: vmerge.vvm v11, v10, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v9, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v8, v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmax.vv v8, v11, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 ; CHECK-NEXT: ret %c = fadd nnan <2 x half> %b, %b %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll index 895875518712e..ee5858504b93b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll @@ -9,8 +9,7 @@ define <2 x double> @foo(<2 x double> %x, <2 x double> %y) { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v9 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v8 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoVFADD_VV_M1_:%[0-9]+]]:vr = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFADD_VV_M1 [[DEF]], [[COPY1]], [[COPY]], 7, 2, 6 /* e64 */, 1 /* ta, mu */ + ; CHECK-NEXT: [[PseudoVFADD_VV_M1_:%[0-9]+]]:vr = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFADD_VV_M1 $noreg, [[COPY1]], [[COPY]], 7, 2, 6 /* e64 */, 1 /* ta, mu */, implicit $frm ; CHECK-NEXT: $v8 = COPY [[PseudoVFADD_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v8 %1 = fadd fast <2 x double> %x, %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll index a019a187f0a6e..b2d61457276a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -235,12 +235,12 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vv v10, v8, v8 -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v8, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmfeq.vv v8, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vfmin.vv v8, v11, v8 ; CHECK-NEXT: ret %c = fadd nnan <2 x half> %a, %a %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %b) @@ -253,12 +253,12 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfadd.vv v10, v9, v9 -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vmfeq.vv v9, v8, v8 -; CHECK-NEXT: vmerge.vvm v11, v10, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v9, v10, v10 +; CHECK-NEXT: vmerge.vvm v11, v8, v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmin.vv v8, v11, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 ; CHECK-NEXT: ret %c = fadd nnan <2 x half> %b, %b %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll index a63f7d289141d..70cb424166748 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll @@ -160,8 +160,8 @@ define void @splat_zero_16f16(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x half> poison, half 0.0, i32 0 @@ -182,8 +182,8 @@ define void @splat_zero_v8f32(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: vse32.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x float> poison, float 0.0, i32 0 @@ -204,8 +204,8 @@ define void @splat_zero_v4f64(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse64.v v8, (a1) +; LMULMAX1-NEXT: vse64.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse64.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <4 x double> poison, double 0.0, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll index 3428425ba1421..957d2a4b9205a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -77,8 +77,8 @@ define void @fp2si_v2f32_v2i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -95,8 +95,8 @@ define void @fp2ui_v2f32_v2i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -114,8 +114,8 @@ define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -133,8 +133,8 @@ define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) @@ -151,13 +151,12 @@ define void @fp2si_v2f16_v2i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, ptr %x @@ -172,13 +171,12 @@ define void @fp2ui_v2f16_v2i64(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, ptr %x @@ -663,12 +661,11 @@ declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> %a) define void @fp2si_v2f64_v2i32(ptr %x, ptr %y) { ; CHECK-LABEL: fp2si_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret @@ -682,12 +679,11 @@ declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>) define void @fp2ui_v2f64_v2i32(ptr %x, ptr %y) { ; CHECK-LABEL: fp2ui_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll index 1acbaff0cf966..b6a883e49f5c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll @@ -94,25 +94,25 @@ declare <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float>, <32 x i1>, i32) define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vfpext_v32f32_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfwcvt.f.f.v v16, v24, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> %a, <32 x i1> %m, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll index ea980db61ae1d..8b3136fc40c6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll @@ -316,23 +316,23 @@ declare <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double>, <32 x i1>, i32) define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_v32i64_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll index 37b7fcedcdfa0..1fd37301b88fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll @@ -316,23 +316,23 @@ declare <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double>, <32 x i1>, i32) define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_v32i64_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll index 432a919c452d8..068ec85e6e5cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll @@ -94,27 +94,27 @@ declare <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double>, <32 x i1>, i3 define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vfptrunc_v32f32_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v0 +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfncvt.f.f.w v24, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v12, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v24, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vfncvt.f.f.w v24, v16, v0.t ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: ret %v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl) ret <32 x float> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll index 671531c70330c..5f1cc2d4b4e96 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll @@ -802,24 +802,24 @@ define <16 x i64> @fshr_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v24, (a0) +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: li a0, 63 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsll.vi v16, v8, 1, v0.t -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vand.vx v8, v24, a0, v0.t +; RV32-NEXT: vsrl.vv v16, v16, v8, v0.t +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vxor.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 63 ; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsll.vv v8, v16, v8, v0.t -; RV32-NEXT: vand.vx v16, v24, a0, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vv v16, v24, v16, v0.t +; RV32-NEXT: vsll.vi v24, v24, 1, v0.t +; RV32-NEXT: vsll.vv v8, v24, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 @@ -835,20 +835,20 @@ define <16 x i64> @fshr_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v16, v8, 1, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV64-NEXT: li a0, 63 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v24, a0, v0.t +; RV64-NEXT: vsrl.vv v16, v16, v8, v0.t ; RV64-NEXT: vnot.v v8, v24, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsll.vv v8, v16, v8, v0.t -; RV64-NEXT: vand.vx v16, v24, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vv v16, v24, v16, v0.t +; RV64-NEXT: vsll.vi v24, v24, 1, v0.t +; RV64-NEXT: vsll.vv v8, v24, v8, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 @@ -869,25 +869,26 @@ define <16 x i64> @fshl_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v24, (a0) +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vmv8r.v v16, v8 +; RV32-NEXT: li a0, 63 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v8, -1 +; RV32-NEXT: vand.vx v8, v24, a0, v0.t +; RV32-NEXT: vsll.vv v8, v16, v8, v0.t +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v16, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: vsrl.vv v8, v16, v8, v0.t -; RV32-NEXT: vand.vx v16, v24, a0, v0.t +; RV32-NEXT: vxor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vx v16, v16, a0, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsll.vv v16, v24, v16, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 1, v0.t +; RV32-NEXT: vsrl.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -902,21 +903,22 @@ define <16 x i64> @fshl_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v16, 1, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-NEXT: vmv8r.v v16, v8 ; RV64-NEXT: li a0, 63 -; RV64-NEXT: vnot.v v8, v24, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vsrl.vv v8, v16, v8, v0.t -; RV64-NEXT: vand.vx v16, v24, a0, v0.t +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vand.vx v8, v24, a0, v0.t +; RV64-NEXT: vsll.vv v8, v16, v8, v0.t +; RV64-NEXT: vnot.v v16, v24, v0.t +; RV64-NEXT: vand.vx v16, v16, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsll.vv v16, v24, v16, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: vsrl.vi v24, v24, 1, v0.t +; RV64-NEXT: vsrl.vv v16, v24, v16, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll index 922750223a1cd..42bc54da09a0b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -22,9 +22,9 @@ define <1 x i1> @insertelt_idx_v1i1(<1 x i1> %x, i1 %elt, i32 zeroext %idx) noun ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -55,9 +55,9 @@ define <2 x i1> @insertelt_idx_v2i1(<2 x i1> %x, i1 %elt, i32 zeroext %idx) noun ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -90,9 +90,9 @@ define <8 x i1> @insertelt_idx_v8i1(<8 x i1> %x, i1 %elt, i32 zeroext %idx) noun ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -106,15 +106,14 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind { ; CHECK-LABEL: insertelt_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma -; CHECK-NEXT: vslideup.vi v12, v8, 1 +; CHECK-NEXT: vslideup.vi v8, v12, 1 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <64 x i1> %x, i1 %elt, i64 1 @@ -125,16 +124,15 @@ define <64 x i1> @insertelt_idx_v64i1(<64 x i1> %x, i1 %elt, i32 zeroext %idx) n ; CHECK-LABEL: insertelt_idx_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma -; CHECK-NEXT: vslideup.vx v12, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v12, a1 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <64 x i1> %x, i1 %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index bef9d34eabf20..14adf92652640 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -422,10 +422,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 2 -; CHECK-NEXT: li a0, 67 -; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: li a0, 67 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -470,14 +470,14 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i2we4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vrgather.vi v10, v8, 2 -; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: vmv.v.i v10, 4 ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v11, v8, 2 +; CHECK-NEXT: vslideup.vi v11, v10, 2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vmv.v.x v0, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -488,38 +488,38 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; RV32-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, 8256 -; RV32-NEXT: addi a0, a0, 2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v11, a0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vrgather.vv v10, v8, v11 -; RV32-NEXT: vmv.v.i v8, 6 +; RV32-NEXT: vmv.v.i v10, 6 ; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; RV32-NEXT: vslideup.vi v11, v8, 5 +; RV32-NEXT: vslideup.vi v11, v10, 5 +; RV32-NEXT: lui a0, 8256 +; RV32-NEXT: addi a0, a0, 2 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-NEXT: li a0, 98 ; RV32-NEXT: vmv.v.x v0, a0 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vrgather.vv v10, v8, v12 ; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 8256 -; RV64-NEXT: addiw a0, a0, 2 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vmv.v.x v11, a0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vrgather.vv v10, v8, v11 -; RV64-NEXT: vmv.v.i v8, 6 +; RV64-NEXT: vmv.v.i v10, 6 ; RV64-NEXT: vmv.v.i v11, 0 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; RV64-NEXT: vslideup.vi v11, v8, 5 +; RV64-NEXT: vslideup.vi v11, v10, 5 +; RV64-NEXT: lui a0, 8256 +; RV64-NEXT: addiw a0, a0, 2 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64-NEXT: li a0, 98 ; RV64-NEXT: vmv.v.x v0, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll index 7e092ae0a7574..c8c2aea4b4ebb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -339,8 +339,8 @@ define void @splat_zero_v32i8(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse8.v v8, (a1) +; LMULMAX1-NEXT: vse8.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <32 x i8> poison, i8 0, i32 0 @@ -368,8 +368,8 @@ define void @splat_zero_v16i16(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x i16> poison, i16 0, i32 0 @@ -397,8 +397,8 @@ define void @splat_zero_v8i32(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: vse32.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x i32> poison, i32 0, i32 0 @@ -426,8 +426,8 @@ define void @splat_zero_v4i64(ptr %x) { ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX1-RV32-NEXT: addi a0, a0, 16 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -435,8 +435,8 @@ define void @splat_zero_v4i64(ptr %x) { ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: addi a0, a0, 16 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = insertelement <4 x i64> poison, i64 0, i32 0 @@ -632,8 +632,8 @@ define void @splat_allones_v32i8(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse8.v v8, (a1) +; LMULMAX1-NEXT: vse8.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <32 x i8> poison, i8 -1, i32 0 @@ -661,8 +661,8 @@ define void @splat_allones_v16i16(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: vse16.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <16 x i16> poison, i16 -1, i32 0 @@ -690,8 +690,8 @@ define void @splat_allones_v8i32(ptr %x) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, -1 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: vse32.v v8, (a0) +; LMULMAX1-NEXT: addi a0, a0, 16 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret %a = insertelement <8 x i32> poison, i32 -1, i32 0 @@ -719,8 +719,8 @@ define void @splat_allones_v4i64(ptr %x) { ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v8, -1 -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX1-RV32-NEXT: addi a0, a0, 16 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: ret ; @@ -728,8 +728,8 @@ define void @splat_allones_v4i64(ptr %x) { ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmv.v.i v8, -1 -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV64-NEXT: addi a0, a0, 16 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: ret %a = insertelement <4 x i64> poison, i64 -1, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 4907e2c71b22c..79e589a5a5e01 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1105,6 +1105,14 @@ define void @mulhu_v16i8(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: lui a1, 3 +; RV32-NEXT: addi a1, a1, -2044 +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v0, a1 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: li a1, -128 +; RV32-NEXT: vmerge.vxm v10, v9, a1, v0 ; RV32-NEXT: lui a1, 1 ; RV32-NEXT: addi a2, a1, 32 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -1112,19 +1120,11 @@ define void @mulhu_v16i8(ptr %x) { ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: lui a2, %hi(.LCPI65_0) ; RV32-NEXT: addi a2, a2, %lo(.LCPI65_0) -; RV32-NEXT: vle8.v v9, (a2) -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vmerge.vim v11, v10, 1, v0 -; RV32-NEXT: vsrl.vv v11, v8, v11 -; RV32-NEXT: vmulhu.vv v9, v11, v9 +; RV32-NEXT: vle8.v v11, (a2) +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vsrl.vv v9, v8, v9 +; RV32-NEXT: vmulhu.vv v9, v9, v11 ; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: lui a2, 3 -; RV32-NEXT: addi a2, a2, -2044 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a2 -; RV32-NEXT: li a2, -128 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vxm v10, v10, a2, v0 ; RV32-NEXT: vmulhu.vv v8, v8, v10 ; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: li a2, 513 @@ -1152,6 +1152,14 @@ define void @mulhu_v16i8(ptr %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vle8.v v8, (a0) +; RV64-NEXT: lui a1, 3 +; RV64-NEXT: addiw a1, a1, -2044 +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vmv.v.x v0, a1 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: li a1, -128 +; RV64-NEXT: vmerge.vxm v10, v9, a1, v0 ; RV64-NEXT: lui a1, 1 ; RV64-NEXT: addiw a2, a1, 32 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -1159,19 +1167,11 @@ define void @mulhu_v16i8(ptr %x) { ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: lui a2, %hi(.LCPI65_0) ; RV64-NEXT: addi a2, a2, %lo(.LCPI65_0) -; RV64-NEXT: vle8.v v9, (a2) -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vmerge.vim v11, v10, 1, v0 -; RV64-NEXT: vsrl.vv v11, v8, v11 -; RV64-NEXT: vmulhu.vv v9, v11, v9 +; RV64-NEXT: vle8.v v11, (a2) +; RV64-NEXT: vmerge.vim v9, v9, 1, v0 +; RV64-NEXT: vsrl.vv v9, v8, v9 +; RV64-NEXT: vmulhu.vv v9, v9, v11 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: lui a2, 3 -; RV64-NEXT: addiw a2, a2, -2044 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.x v0, a2 -; RV64-NEXT: li a2, -128 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vxm v10, v10, a2, v0 ; RV64-NEXT: vmulhu.vv v8, v8, v10 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: li a2, 513 @@ -1205,32 +1205,32 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v11, 1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vmv1r.v v11, v10 -; CHECK-NEXT: vslideup.vi v11, v9, 6 +; CHECK-NEXT: vslideup.vi v9, v11, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI66_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) ; CHECK-NEXT: vle16.v v12, (a1) -; CHECK-NEXT: vsrl.vv v11, v8, v11 -; CHECK-NEXT: vmulhu.vv v11, v11, v12 -; CHECK-NEXT: vsub.vv v8, v8, v11 -; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vv v9, v8, v9 +; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vmulhu.vv v8, v8, v10 -; CHECK-NEXT: vadd.vv v8, v8, v11 +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: li a1, 33 ; CHECK-NEXT: vmv.v.x v0, a1 -; CHECK-NEXT: vmv.v.i v10, 3 -; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 +; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v10, v9, 6 +; CHECK-NEXT: vslideup.vi v9, v11, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x @@ -1272,18 +1272,18 @@ define void @mulhu_v4i32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vslideup.vi v10, v9, 2 ; CHECK-NEXT: lui a1, %hi(.LCPI68_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI68_0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vmulhu.vv v9, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v11, v10, 2 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmulhu.vv v8, v8, v11 +; CHECK-NEXT: vmulhu.vv v8, v8, v10 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vmv.v.i v9, 2 ; CHECK-NEXT: li a1, 1 @@ -1440,24 +1440,24 @@ define void @mulhs_v6i16(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v0, 6 -; CHECK-NEXT: vmv.v.i v9, -7 -; CHECK-NEXT: vmerge.vim v9, v9, 7, v0 -; CHECK-NEXT: vdiv.vv v9, v8, v9 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 7 -; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: li a1, -14 -; CHECK-NEXT: vmadd.vx v11, a1, v10 +; CHECK-NEXT: vmadd.vx v10, a1, v9 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vslidedown.vi v9, v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vdiv.vv v8, v8, v11 +; CHECK-NEXT: vdiv.vv v9, v9, v10 +; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v10, -7 +; CHECK-NEXT: vmerge.vim v10, v10, 7, v0 +; CHECK-NEXT: vdiv.vv v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 4 +; CHECK-NEXT: vslideup.vi v8, v9, 4 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = sdiv <6 x i16> %a, @@ -1525,16 +1525,16 @@ define void @mulhs_v2i64(ptr %x) { ; RV32-NEXT: vrsub.vi v10, v10, 0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmadd.vv v10, v8, v9 -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsrl.vx v8, v10, a1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 1 -; RV32-NEXT: vmv.v.i v11, 0 +; RV32-NEXT: vmv.v.i v8, 1 +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV32-NEXT: vslideup.vi v11, v9, 2 +; RV32-NEXT: vslideup.vi v9, v8, 2 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsra.vv v9, v10, v11 -; RV32-NEXT: vadd.vv v8, v9, v8 +; RV32-NEXT: vsra.vv v8, v10, v9 +; RV32-NEXT: li a1, 63 +; RV32-NEXT: vsrl.vx v9, v10, a1 +; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; @@ -4955,6 +4955,13 @@ define void @mulhu_v32i8(ptr %x) { ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV32-NEXT: lui a2, 163907 +; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 +; LMULMAX2-RV32-NEXT: li a2, -128 +; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v10, a2, v0 ; LMULMAX2-RV32-NEXT: lui a2, 66049 ; LMULMAX2-RV32-NEXT: addi a2, a2, 32 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -4962,20 +4969,13 @@ define void @mulhu_v32i8(ptr %x) { ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI181_0) ; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV32-NEXT: vle8.v v12, (a2) -; LMULMAX2-RV32-NEXT: vmerge.vim v14, v10, 1, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v14, v8, v14 -; LMULMAX2-RV32-NEXT: vmulhu.vv v12, v14, v12 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: lui a2, 163907 -; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 -; LMULMAX2-RV32-NEXT: li a2, -128 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vle8.v v14, (a2) +; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v10, v8, v10 +; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14 +; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV32-NEXT: lui a2, 8208 ; LMULMAX2-RV32-NEXT: addi a2, a2, 513 @@ -5005,6 +5005,13 @@ define void @mulhu_v32i8(ptr %x) { ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX2-RV64-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV64-NEXT: lui a2, 163907 +; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 +; LMULMAX2-RV64-NEXT: li a2, -128 +; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v10, a2, v0 ; LMULMAX2-RV64-NEXT: lui a2, 66049 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 32 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -5012,20 +5019,13 @@ define void @mulhu_v32i8(ptr %x) { ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI181_0) ; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV64-NEXT: vle8.v v12, (a2) -; LMULMAX2-RV64-NEXT: vmerge.vim v14, v10, 1, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v14, v8, v14 -; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v14, v12 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: lui a2, 163907 -; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 -; LMULMAX2-RV64-NEXT: li a2, -128 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v12 +; LMULMAX2-RV64-NEXT: vle8.v v14, (a2) +; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v10, v8, v10 +; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14 +; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 +; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV64-NEXT: lui a2, 8208 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 513 @@ -5074,6 +5074,13 @@ define void @mulhu_v16i16(ptr %x) { ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle16.v v10, (a0) +; LMULMAX2-RV32-NEXT: li a1, 257 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 +; LMULMAX2-RV32-NEXT: lui a1, 1048568 +; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v12, a1, v0 ; LMULMAX2-RV32-NEXT: lui a1, 4 ; LMULMAX2-RV32-NEXT: addi a1, a1, 64 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -5081,19 +5088,12 @@ define void @mulhu_v16i16(ptr %x) { ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI182_0) ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) -; LMULMAX2-RV32-NEXT: vle16.v v12, (a1) -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV32-NEXT: vle16.v v16, (a1) ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV32-NEXT: vmerge.vim v16, v14, 1, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v16, v10, v16 -; LMULMAX2-RV32-NEXT: vmulhu.vv v12, v16, v12 +; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v12, v10, v12 +; LMULMAX2-RV32-NEXT: vmulhu.vv v12, v12, v16 ; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 -; LMULMAX2-RV32-NEXT: lui a1, 1048568 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a1, v0 ; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14 ; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v12 ; LMULMAX2-RV32-NEXT: lui a1, 2 @@ -5113,6 +5113,13 @@ define void @mulhu_v16i16(ptr %x) { ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle16.v v10, (a0) +; LMULMAX2-RV64-NEXT: li a1, 257 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 +; LMULMAX2-RV64-NEXT: lui a1, 1048568 +; LMULMAX2-RV64-NEXT: vmerge.vxm v14, v12, a1, v0 ; LMULMAX2-RV64-NEXT: lui a1, 4 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 64 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma @@ -5120,19 +5127,12 @@ define void @mulhu_v16i16(ptr %x) { ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_0) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) -; LMULMAX2-RV64-NEXT: vle16.v v12, (a1) -; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV64-NEXT: vle16.v v16, (a1) ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV64-NEXT: vmerge.vim v16, v14, 1, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v16, v10, v16 -; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v16, v12 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v12, v10, v12 +; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v12, v16 ; LMULMAX2-RV64-NEXT: vsub.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 -; LMULMAX2-RV64-NEXT: lui a1, 1048568 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v14, v14, a1, v0 ; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14 ; LMULMAX2-RV64-NEXT: vadd.vv v10, v10, v12 ; LMULMAX2-RV64-NEXT: lui a1, 2 @@ -5173,18 +5173,18 @@ define void @mulhu_v8i32(ptr %x) { ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0) -; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: li a1, 68 ; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-NEXT: vmv.v.x v0, a1 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0) +; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0) +; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vmv.v.i v12, 0 ; LMULMAX2-NEXT: lui a1, 524288 ; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0 +; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: li a1, 136 @@ -5200,33 +5200,33 @@ define void @mulhu_v8i32(ptr %x) { ; LMULMAX1-RV32-LABEL: mulhu_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI183_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI183_0) -; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV32-NEXT: vle32.v v10, (a0) -; LMULMAX1-RV32-NEXT: vmulhu.vv v11, v8, v9 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) ; LMULMAX1-RV32-NEXT: lui a2, 524288 -; LMULMAX1-RV32-NEXT: vmv.s.x v12, a2 -; LMULMAX1-RV32-NEXT: vmv.v.i v13, 0 +; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 +; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; LMULMAX1-RV32-NEXT: vslideup.vi v13, v12, 2 +; LMULMAX1-RV32-NEXT: vslideup.vi v11, v10, 2 +; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI183_0) +; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI183_0) ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmulhu.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vmv.v.i v11, 2 +; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) +; LMULMAX1-RV32-NEXT: vmulhu.vv v12, v9, v10 +; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vmv.v.i v12, 2 ; LMULMAX1-RV32-NEXT: li a2, 1 -; LMULMAX1-RV32-NEXT: vslide1down.vx v11, v11, a2 -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsub.vv v10, v10, v9 -; LMULMAX1-RV32-NEXT: vmulhu.vv v10, v10, v13 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) +; LMULMAX1-RV32-NEXT: vslide1down.vx v12, v12, a2 +; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vmulhu.vv v10, v8, v10 +; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vmulhu.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v12 +; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mulhu_v8i32: @@ -5283,24 +5283,24 @@ define void @mulhu_v4i64(ptr %x) { ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) -; LMULMAX2-RV64-NEXT: vle64.v v10, (a1) -; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: li a1, -1 ; LMULMAX2-RV64-NEXT: slli a1, a1, 63 -; LMULMAX2-RV64-NEXT: vmv.s.x v12, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; LMULMAX2-RV64-NEXT: vslideup.vi v14, v12, 2 +; LMULMAX2-RV64-NEXT: vslideup.vi v12, v10, 2 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64-NEXT: vle64.v v10, (a1) +; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_1) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_1) -; LMULMAX2-RV64-NEXT: vle64.v v12, (a1) -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v14 +; LMULMAX2-RV64-NEXT: vle64.v v14, (a1) +; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 +; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v14 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -5330,46 +5330,46 @@ define void @mulhu_v4i64(ptr %x) { ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI184_0) -; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) -; LMULMAX1-RV64-NEXT: vlse64.v v9, (a1), zero -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI184_1) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI184_1)(a1) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vmv.v.i v10, 0 +; LMULMAX1-RV64-NEXT: li a2, -1 +; LMULMAX1-RV64-NEXT: slli a2, a2, 63 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v9, a1 +; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_0) +; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI184_0) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsub.vv v10, v10, v9 -; LMULMAX1-RV64-NEXT: vmv.v.i v11, 0 -; LMULMAX1-RV64-NEXT: li a1, -1 -; LMULMAX1-RV64-NEXT: slli a1, a1, 63 +; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI184_1)(a2) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a1 +; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11 +; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11 +; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 +; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vid.v v10 ; LMULMAX1-RV64-NEXT: vadd.vi v11, v10, 2 ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: lui a1, 838861 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -819 -; LMULMAX1-RV64-NEXT: slli a3, a1, 32 -; LMULMAX1-RV64-NEXT: add a1, a1, a3 -; LMULMAX1-RV64-NEXT: vmv.v.x v11, a1 -; LMULMAX1-RV64-NEXT: lui a1, 699051 -; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365 -; LMULMAX1-RV64-NEXT: slli a3, a1, 32 -; LMULMAX1-RV64-NEXT: add a1, a1, a3 +; LMULMAX1-RV64-NEXT: lui a2, 838861 +; LMULMAX1-RV64-NEXT: addiw a2, a2, -819 +; LMULMAX1-RV64-NEXT: slli a3, a2, 32 +; LMULMAX1-RV64-NEXT: add a2, a2, a3 +; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 +; LMULMAX1-RV64-NEXT: lui a2, 699051 +; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365 +; LMULMAX1-RV64-NEXT: slli a3, a2, 32 +; LMULMAX1-RV64-NEXT: add a2, a2, a3 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a1 +; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmulhu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v10, 1 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) +; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, ptr %x %b = udiv <4 x i64> %a, @@ -5383,18 +5383,18 @@ define void @mulhs_v32i8(ptr %x) { ; LMULMAX2-RV32-NEXT: li a1, 32 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a2, -123 -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 +; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV32-NEXT: lui a2, 304453 ; LMULMAX2-RV32-NEXT: addi a2, a2, -1452 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 -; LMULMAX2-RV32-NEXT: li a2, 57 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV32-NEXT: li a1, -123 +; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32-NEXT: li a1, 57 +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a1, v0 +; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -5404,18 +5404,18 @@ define void @mulhs_v32i8(ptr %x) { ; LMULMAX2-RV64-NEXT: li a1, 32 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a2, -123 -; LMULMAX2-RV64-NEXT: vmv.v.x v10, a2 +; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV64-NEXT: lui a2, 304453 ; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 -; LMULMAX2-RV64-NEXT: li a2, 57 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV64-NEXT: li a1, -123 +; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV64-NEXT: li a1, 57 +; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0 +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret @@ -5658,16 +5658,16 @@ define void @mulhs_v4i64(ptr %x) { ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: vmv.v.i v10, -1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.i v0, 5 +; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: lui a1, 349525 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32 ; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI188_0) ; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI188_0)(a2) -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.i v10, -1 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 ; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1 ; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a2, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll index f35f7dcdd9bef..9161cedd58e3c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll @@ -7,13 +7,12 @@ define void @vector_interleave_store_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b, ptr %p) { ; CHECK-LABEL: vector_interleave_store_v32i1_v16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v0, v8, 2 +; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index f0653dfc916a2..08a550f3eb448 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -16,10 +16,9 @@ define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) { ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v9, v8, v8 ; CHECK-NEXT: vrgather.vv v8, v10, v9 +; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v10, 4 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vrgather.vi v8, v12, 0, v0.t ; CHECK-NEXT: vadd.vi v11, v9, 1 @@ -656,15 +655,15 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 92 +; RV64-NEXT: li a3, 90 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdc, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 92 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xda, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 90 * vlenb ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: addi a2, a1, 256 ; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 68 +; RV64-NEXT: li a3, 57 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -672,55 +671,62 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a2, a1, 128 ; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 84 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 6 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 73 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vrgather.vi v8, v16, 4 ; RV64-NEXT: li a1, 128 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.x v0, a1 +; RV64-NEXT: vmv.v.x v1, a1 +; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v16, 8 +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v8, v16, 2, v0.t +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t ; RV64-NEXT: vmv.v.v v4, v8 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: li a1, 6 ; RV64-NEXT: vid.v v8 -; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: vmul.vx v16, v8, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 81 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: li a1, 56 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 60 +; RV64-NEXT: li a3, 73 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vv v16, v24, v8 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 76 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v8, v8, -16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v8, v16 +; RV64-NEXT: vadd.vi v8, v16, -16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 44 +; RV64-NEXT: li a3, 41 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -728,123 +734,120 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 25 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 44 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v8, v16, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v4, v16 +; RV64-NEXT: vmv.v.v v4, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 36 +; RV64-NEXT: li a2, 29 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v0, 5 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v16, v8, 5 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v24, 3, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v16, v8, 3, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 44 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v0, v8, 1 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v0, v16, 1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v0 -; RV64-NEXT: vadd.vi v24, v8, -15 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v8, v0 +; RV64-NEXT: vadd.vi v8, v16, -15 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 25 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v8, v16, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 44 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v20, v16 +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 44 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vadd.vi v8, v24, 2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -853,396 +856,357 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: li a1, 24 ; RV64-NEXT: vadd.vi v8, v24, -14 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.x v0, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv.v.x v2, a1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vmv.v.i v12, 6 +; RV64-NEXT: vmv.v.i v8, 6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v8, zero -; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 12 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vslideup.vi v12, v8, 5 +; RV64-NEXT: vmv.s.x v4, zero +; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma +; RV64-NEXT: vslideup.vi v8, v4, 5 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v20, v0, v12 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v12, v24, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl1r.v v1, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v8, 4, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v12, v24, 4, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v20, v16 +; RV64-NEXT: vmv.v.v v12, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: li a2, 25 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v0, v8, 3 +; RV64-NEXT: vadd.vi v16, v8, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v0 -; RV64-NEXT: vmv.v.v v24, v16 -; RV64-NEXT: vadd.vi v16, v8, -13 +; RV64-NEXT: vrgather.vv v8, v24, v16 +; RV64-NEXT: vmv.v.v v24, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v8, v8, -13 +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v8, v16, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: li a1, 1 -; RV64-NEXT: vmv.v.i v20, 7 +; RV64-NEXT: vmv.v.i v12, 7 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 20 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v20, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v16, a1 ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vslideup.vi v20, v8, 5 +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vslideup.vi v12, v16, 5 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v20 +; RV64-NEXT: vrgather.vv v16, v24, v12 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v16, v8, 5, v0.t +; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v24, 5, v0.t -; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v16, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v8, v8, 4 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 12 +; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v24, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v20, v8, 6 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: li a1, 192 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: li a1, 28 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 68 +; RV64-NEXT: li a3, 81 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v24, 2 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v16, v8, -12 +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v20, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v0, v24, 4 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v0 -; RV64-NEXT: li a1, 28 -; RV64-NEXT: vadd.vi v16, v24, -12 -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v24, v4, 6 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: li a1, 192 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 57 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v28, v8, 2 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 12 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv4r.v v12, v28 +; RV64-NEXT: vrgather.vv v12, v8, v24, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v12, v8 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v12, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v4, v8, 6 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v24, v0, 5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 +; RV64-NEXT: li a2, 73 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v16, 3 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v8, v24 +; RV64-NEXT: vadd.vi v24, v0, -11 +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 52 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v4, v0.t -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 6 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v0, v16, 5 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v0 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 -; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v24, v24, -11 +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v24, v8, 6 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v12, v8, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 12 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV64-NEXT: vrgather.vv v12, v16, v24, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 20 +; RV64-NEXT: li a2, 81 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v12, v8 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 36 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v16, (a0) +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v12, v16 ; RV64-NEXT: addi a1, a0, 320 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 40 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 5 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 24 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: li a3, 25 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: addi a1, a0, 64 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 41 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: addi a0, a0, 64 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 44 +; RV64-NEXT: li a2, 29 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 92 +; RV64-NEXT: li a1, 90 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 5a8f5c52b42f7..600f497328c6d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -361,9 +361,9 @@ define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB6_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: ret %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) %ev = sext <2 x i8> %v to <2 x i64> @@ -422,11 +422,11 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB7_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a0, v9 -; RV64ZVE32F-NEXT: andi a1, a0, 255 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: andi a0, a0, 255 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: andi a1, a1, 255 ; RV64ZVE32F-NEXT: ret %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) %ev = zext <2 x i8> %v to <2 x i64> @@ -1079,9 +1079,9 @@ define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB17_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: ret %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) %ev = sext <2 x i16> %v to <2 x i64> @@ -2145,9 +2145,9 @@ define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: ret %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) %ev = sext <2 x i32> %v to <2 x i64> @@ -2204,13 +2204,13 @@ define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a0, v9 -; RV64ZVE32F-NEXT: slli a0, a0, 32 -; RV64ZVE32F-NEXT: srli a1, a0, 32 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 ; RV64ZVE32F-NEXT: slli a0, a0, 32 ; RV64ZVE32F-NEXT: srli a0, a0, 32 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 32 +; RV64ZVE32F-NEXT: srli a1, a1, 32 ; RV64ZVE32F-NEXT: ret %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) %ev = zext <2 x i32> %v to <2 x i64> @@ -12355,26 +12355,24 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; ; RV64V-LABEL: mgather_baseidx_v32i8: ; RV64V: # %bb.0: -; RV64V-NEXT: vmv1r.v v12, v0 -; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v14, v8, 16 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v14 +; RV64V-NEXT: vsext.vf8 v16, v8 +; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64V-NEXT: vmv1r.v v12, v10 +; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v14, v10, 16 +; RV64V-NEXT: vslidedown.vi v10, v10, 16 +; RV64V-NEXT: vslidedown.vi v8, v8, 16 +; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v8 ; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64V-NEXT: vslidedown.vi v0, v0, 2 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64V-NEXT: vluxei64.v v14, (a0), v16, v0.t -; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v8 -; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64V-NEXT: vmv1r.v v0, v12 ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64V-NEXT: li a0, 32 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64V-NEXT: vslideup.vi v10, v14, 16 -; RV64V-NEXT: vmv.v.v v8, v10 +; RV64V-NEXT: vslideup.vi v12, v10, 16 +; RV64V-NEXT: vmv.v.v v8, v12 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_baseidx_v32i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index b86ef417c6a88..14c24cfaaf6fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -10850,11 +10850,10 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vslidedown.vi v10, v10, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index 3453545c9adab..52d37d7f4b7af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -1793,20 +1793,20 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: lui a2, %hi(.LCPI72_0) -; RV32-NEXT: addi a2, a2, %lo(.LCPI72_0) ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: lui a2, %hi(.LCPI72_0) +; RV32-NEXT: addi a2, a2, %lo(.LCPI72_0) ; RV32-NEXT: vle32.v v16, (a2) ; RV32-NEXT: mv a2, a0 -; RV32-NEXT: vmsltu.vx v12, v16, a1 -; RV32-NEXT: vid.v v16 +; RV32-NEXT: vid.v v24 +; RV32-NEXT: vmsltu.vx v12, v24, a1 ; RV32-NEXT: vmsltu.vx v13, v16, a1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vi v13, v12, 4 +; RV32-NEXT: vslideup.vi v12, v13, 4 ; RV32-NEXT: li a0, 64 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-NEXT: vmand.mm v0, v13, v0 +; RV32-NEXT: vmand.mm v0, v12, v0 ; RV32-NEXT: vmv.v.i v12, 1 ; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV32-NEXT: vslidedown.vx v12, v8, a3 @@ -1836,20 +1836,20 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: lui a2, %hi(.LCPI72_0) -; RV64-NEXT: addi a2, a2, %lo(.LCPI72_0) ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: lui a2, %hi(.LCPI72_0) +; RV64-NEXT: addi a2, a2, %lo(.LCPI72_0) ; RV64-NEXT: vle32.v v16, (a2) ; RV64-NEXT: mv a2, a0 -; RV64-NEXT: vmsltu.vx v12, v16, a1 -; RV64-NEXT: vid.v v16 +; RV64-NEXT: vid.v v24 +; RV64-NEXT: vmsltu.vx v12, v24, a1 ; RV64-NEXT: vmsltu.vx v13, v16, a1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vi v13, v12, 4 +; RV64-NEXT: vslideup.vi v12, v13, 4 ; RV64-NEXT: li a0, 64 ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-NEXT: vmand.mm v0, v13, v0 +; RV64-NEXT: vmand.mm v0, v12, v0 ; RV64-NEXT: vmv.v.i v12, 1 ; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV64-NEXT: vslidedown.vx v12, v8, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll index 5004246fbba91..29bfb22cff594 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -945,10 +945,10 @@ define i64 @vreduce_add_v1i64(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_add_v1i64: @@ -968,10 +968,10 @@ define i64 @vwreduce_add_v1i64(ptr %x) { ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: vsext.vf2 v9, v8 -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v8, v9, a0 ; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: vwreduce_add_v1i64: @@ -993,10 +993,10 @@ define i64 @vwreduce_uadd_v1i64(ptr %x) { ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: vzext.vf2 v9, v8 -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v8, v9, a0 ; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: ret ; ; RV64-LABEL: vwreduce_uadd_v1i64: @@ -2130,10 +2130,10 @@ define i64 @vreduce_and_v1i64(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_and_v1i64: @@ -2155,11 +2155,11 @@ define i64 @vreduce_and_v2i64(ptr %x) { ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredand.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_and_v2i64: @@ -2715,10 +2715,10 @@ define i64 @vreduce_or_v1i64(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_or_v1i64: @@ -2740,11 +2740,11 @@ define i64 @vreduce_or_v2i64(ptr %x) { ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredor.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_or_v2i64: @@ -3321,10 +3321,10 @@ define i64 @vreduce_xor_v1i64(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_xor_v1i64: @@ -3918,10 +3918,10 @@ define i64 @vreduce_smin_v1i64(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v1i64: @@ -3943,11 +3943,11 @@ define i64 @vreduce_smin_v2i64(ptr %x) { ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredmin.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v2i64: @@ -4503,10 +4503,10 @@ define i64 @vreduce_smax_v1i64(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v1i64: @@ -4528,11 +4528,11 @@ define i64 @vreduce_smax_v2i64(ptr %x) { ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredmax.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v2i64: @@ -5088,10 +5088,10 @@ define i64 @vreduce_umin_v1i64(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umin_v1i64: @@ -5113,11 +5113,11 @@ define i64 @vreduce_umin_v2i64(ptr %x) { ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredminu.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umin_v2i64: @@ -5673,10 +5673,10 @@ define i64 @vreduce_umax_v1i64(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umax_v1i64: @@ -5698,11 +5698,11 @@ define i64 @vreduce_umax_v2i64(ptr %x) { ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vredmaxu.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umax_v2i64: @@ -6408,10 +6408,10 @@ define i64 @vreduce_mul_v1i64(ptr %x) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_mul_v1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 9862fe0593670..a772f4d466ccb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -246,8 +246,8 @@ define <8 x i1> @fcmp_ord_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -262,8 +262,8 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -512,8 +512,8 @@ define <8 x i1> @fcmp_uno_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -528,8 +528,8 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 @@ -854,9 +854,9 @@ define <8 x i1> @fcmp_ord_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v12, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v16, v8 +; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v12, v16 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -870,9 +870,9 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v12, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v8, v16 +; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v16, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -1123,9 +1123,9 @@ define <8 x i1> @fcmp_uno_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v12, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v8 +; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v12, v16 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -1139,9 +1139,9 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v12, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v8, v16 +; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t +; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v16, v12 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll index 9d511070dbcbe..460cc0ade43cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll @@ -153,26 +153,26 @@ declare <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32>, <32 x i1>, i32) define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsext_v32i64_v32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vsext.vf2 v16, v24, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB12_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB12_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB12_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vsext.vf2 v24, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsext.vf2 v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v @@ -181,22 +181,23 @@ define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext define <32 x i64> @vsext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vsext_v32i64_v32i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vsext.vf2 v16, v24 -; CHECK-NEXT: bltu a0, a1, .LBB13_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB13_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB13_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vsext.vf2 v24, v8 -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsext.vf2 v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll index 6c4a26df60e79..6667a4969a75d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -100,10 +100,10 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) { define <32 x i1> @reverse_v32i1(<32 x i1> %a) { ; NO-ZVBB-LABEL: reverse_v32i1: ; NO-ZVBB: # %bb.0: +; NO-ZVBB-NEXT: li a0, 32 +; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; NO-ZVBB-NEXT: lui a0, %hi(.LCPI4_0) ; NO-ZVBB-NEXT: addi a0, a0, %lo(.LCPI4_0) -; NO-ZVBB-NEXT: li a1, 32 -; NO-ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; NO-ZVBB-NEXT: vle8.v v8, (a0) ; NO-ZVBB-NEXT: vmv.v.i v10, 0 ; NO-ZVBB-NEXT: vmerge.vim v10, v10, 1, v0 @@ -123,10 +123,10 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) { define <64 x i1> @reverse_v64i1(<64 x i1> %a) { ; NO-ZVBB-LABEL: reverse_v64i1: ; NO-ZVBB: # %bb.0: +; NO-ZVBB-NEXT: li a0, 64 +; NO-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; NO-ZVBB-NEXT: lui a0, %hi(.LCPI5_0) ; NO-ZVBB-NEXT: addi a0, a0, %lo(.LCPI5_0) -; NO-ZVBB-NEXT: li a1, 64 -; NO-ZVBB-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; NO-ZVBB-NEXT: vle8.v v8, (a0) ; NO-ZVBB-NEXT: vmv.v.i v12, 0 ; NO-ZVBB-NEXT: vmerge.vim v12, v12, 1, v0 @@ -146,10 +146,10 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) { define <128 x i1> @reverse_v128i1(<128 x i1> %a) { ; CHECK-LABEL: reverse_v128i1: ; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) -; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll index 36eaec6e4dd93..46daf2d91a1d0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll @@ -308,23 +308,23 @@ declare <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_v32f64_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index fab185bc400b7..4a0e156326bbd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -470,40 +470,41 @@ declare <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr, i32, define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_load_v33f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a5, 32 +; CHECK-RV32-NEXT: li a3, 32 ; CHECK-RV32-NEXT: vmv1r.v v8, v0 -; CHECK-RV32-NEXT: mv a3, a4 -; CHECK-RV32-NEXT: bltu a4, a5, .LBB35_2 +; CHECK-RV32-NEXT: mv a5, a4 +; CHECK-RV32-NEXT: bltu a4, a3, .LBB35_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: li a3, 32 +; CHECK-RV32-NEXT: li a5, 32 ; CHECK-RV32-NEXT: .LBB35_2: -; CHECK-RV32-NEXT: mul a5, a3, a2 -; CHECK-RV32-NEXT: addi a6, a4, -32 -; CHECK-RV32-NEXT: sltu a4, a4, a6 -; CHECK-RV32-NEXT: addi a4, a4, -1 -; CHECK-RV32-NEXT: and a6, a4, a6 -; CHECK-RV32-NEXT: li a4, 16 -; CHECK-RV32-NEXT: add a5, a1, a5 -; CHECK-RV32-NEXT: bltu a6, a4, .LBB35_4 -; CHECK-RV32-NEXT: # %bb.3: +; CHECK-RV32-NEXT: addi a3, a5, -16 +; CHECK-RV32-NEXT: sltu a6, a5, a3 +; CHECK-RV32-NEXT: addi a7, a6, -1 ; CHECK-RV32-NEXT: li a6, 16 -; CHECK-RV32-NEXT: .LBB35_4: -; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v16, (a5), a2, v0.t -; CHECK-RV32-NEXT: addi a5, a3, -16 -; CHECK-RV32-NEXT: sltu a6, a3, a5 -; CHECK-RV32-NEXT: addi a6, a6, -1 -; CHECK-RV32-NEXT: and a5, a6, a5 -; CHECK-RV32-NEXT: bltu a3, a4, .LBB35_6 -; CHECK-RV32-NEXT: # %bb.5: +; CHECK-RV32-NEXT: and a7, a7, a3 +; CHECK-RV32-NEXT: mv a3, a5 +; CHECK-RV32-NEXT: bltu a5, a6, .LBB35_4 +; CHECK-RV32-NEXT: # %bb.3: ; CHECK-RV32-NEXT: li a3, 16 -; CHECK-RV32-NEXT: .LBB35_6: -; CHECK-RV32-NEXT: mul a4, a3, a2 -; CHECK-RV32-NEXT: add a4, a1, a4 +; CHECK-RV32-NEXT: .LBB35_4: +; CHECK-RV32-NEXT: mul t0, a3, a2 +; CHECK-RV32-NEXT: add t0, a1, t0 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v16, (t0), a2, v0.t +; CHECK-RV32-NEXT: mul a7, a5, a2 +; CHECK-RV32-NEXT: addi a5, a4, -32 +; CHECK-RV32-NEXT: sltu a4, a4, a5 +; CHECK-RV32-NEXT: addi a4, a4, -1 +; CHECK-RV32-NEXT: and a5, a4, a5 +; CHECK-RV32-NEXT: add a4, a1, a7 +; CHECK-RV32-NEXT: bltu a5, a6, .LBB35_6 +; CHECK-RV32-NEXT: # %bb.5: +; CHECK-RV32-NEXT: li a5, 16 +; CHECK-RV32-NEXT: .LBB35_6: +; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v24, (a4), a2, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -513,48 +514,49 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: vse64.v v8, (a0) ; CHECK-RV32-NEXT: addi a1, a0, 256 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-RV32-NEXT: vse64.v v16, (a1) +; CHECK-RV32-NEXT: vse64.v v24, (a1) ; CHECK-RV32-NEXT: addi a0, a0, 128 ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-RV32-NEXT: vse64.v v24, (a0) +; CHECK-RV32-NEXT: vse64.v v16, (a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_load_v33f64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: li a5, 32 +; CHECK-RV64-NEXT: li a4, 32 ; CHECK-RV64-NEXT: vmv1r.v v8, v0 -; CHECK-RV64-NEXT: mv a4, a3 -; CHECK-RV64-NEXT: bltu a3, a5, .LBB35_2 +; CHECK-RV64-NEXT: mv a5, a3 +; CHECK-RV64-NEXT: bltu a3, a4, .LBB35_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: li a4, 32 +; CHECK-RV64-NEXT: li a5, 32 ; CHECK-RV64-NEXT: .LBB35_2: -; CHECK-RV64-NEXT: mul a5, a4, a2 -; CHECK-RV64-NEXT: addi a6, a3, -32 -; CHECK-RV64-NEXT: sltu a3, a3, a6 -; CHECK-RV64-NEXT: addi a3, a3, -1 -; CHECK-RV64-NEXT: and a6, a3, a6 -; CHECK-RV64-NEXT: li a3, 16 -; CHECK-RV64-NEXT: add a5, a1, a5 -; CHECK-RV64-NEXT: bltu a6, a3, .LBB35_4 -; CHECK-RV64-NEXT: # %bb.3: +; CHECK-RV64-NEXT: addi a4, a5, -16 +; CHECK-RV64-NEXT: sltu a6, a5, a4 +; CHECK-RV64-NEXT: addi a7, a6, -1 ; CHECK-RV64-NEXT: li a6, 16 -; CHECK-RV64-NEXT: .LBB35_4: -; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v16, (a5), a2, v0.t -; CHECK-RV64-NEXT: addi a5, a4, -16 -; CHECK-RV64-NEXT: sltu a6, a4, a5 -; CHECK-RV64-NEXT: addi a6, a6, -1 -; CHECK-RV64-NEXT: and a5, a6, a5 -; CHECK-RV64-NEXT: bltu a4, a3, .LBB35_6 -; CHECK-RV64-NEXT: # %bb.5: +; CHECK-RV64-NEXT: and a7, a7, a4 +; CHECK-RV64-NEXT: mv a4, a5 +; CHECK-RV64-NEXT: bltu a5, a6, .LBB35_4 +; CHECK-RV64-NEXT: # %bb.3: ; CHECK-RV64-NEXT: li a4, 16 -; CHECK-RV64-NEXT: .LBB35_6: -; CHECK-RV64-NEXT: mul a3, a4, a2 -; CHECK-RV64-NEXT: add a3, a1, a3 +; CHECK-RV64-NEXT: .LBB35_4: +; CHECK-RV64-NEXT: mul t0, a4, a2 +; CHECK-RV64-NEXT: add t0, a1, t0 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v16, (t0), a2, v0.t +; CHECK-RV64-NEXT: mul a7, a5, a2 +; CHECK-RV64-NEXT: addi a5, a3, -32 +; CHECK-RV64-NEXT: sltu a3, a3, a5 +; CHECK-RV64-NEXT: addi a3, a3, -1 +; CHECK-RV64-NEXT: and a5, a3, a5 +; CHECK-RV64-NEXT: add a3, a1, a7 +; CHECK-RV64-NEXT: bltu a5, a6, .LBB35_6 +; CHECK-RV64-NEXT: # %bb.5: +; CHECK-RV64-NEXT: li a5, 16 +; CHECK-RV64-NEXT: .LBB35_6: +; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t ; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma @@ -564,10 +566,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: vse64.v v8, (a0) ; CHECK-RV64-NEXT: addi a1, a0, 256 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-RV64-NEXT: vse64.v v16, (a1) +; CHECK-RV64-NEXT: vse64.v v24, (a1) ; CHECK-RV64-NEXT: addi a0, a0, 128 ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-RV64-NEXT: vse64.v v24, (a0) +; CHECK-RV64-NEXT: vse64.v v16, (a0) ; CHECK-RV64-NEXT: ret %v = call <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 %evl) ret <33 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index 7c1d7d056d55c..4cec22ee28854 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -53,27 +53,27 @@ declare <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16>, <128 x i1>, i32) define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_v128i7_v128i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v0 +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 8 -; CHECK-NEXT: addi a1, a0, -64 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t +; CHECK-NEXT: vslidedown.vi v12, v0, 8 +; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bltu a0, a1, .LBB4_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: addi a2, a0, -64 +; CHECK-NEXT: sltu a0, a0, a2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: li a0, 128 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslideup.vx v16, v24, a1 -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vslideup.vx v8, v24, a1 ; CHECK-NEXT: ret %v = call <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16> %a, <128 x i1> %m, i32 %vl) ret <128 x i7> %v @@ -227,31 +227,30 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 56 -; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: slli a2, a2, 6 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v3, v0, 8 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v2, v0, 4 +; CHECK-NEXT: vslidedown.vi v26, v0, 4 ; CHECK-NEXT: addi a2, a1, 512 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: li a3, 48 ; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 @@ -278,7 +277,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill @@ -289,139 +288,164 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: .LBB16_2: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v4, v2, 2 +; CHECK-NEXT: vslidedown.vi v28, v26, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a5) +; CHECK-NEXT: vle64.v v8, (a5) +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vmv1r.v v0, v27 ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: li a6, 48 ; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 +; CHECK-NEXT: li a6, 56 ; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a7, a3, .LBB16_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a7, 64 ; CHECK-NEXT: .LBB16_4: +; CHECK-NEXT: addi a5, a1, 384 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a5, a7, -32 -; CHECK-NEXT: sltu a6, a7, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: addi a6, a5, -16 -; CHECK-NEXT: sltu t0, a5, a6 +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: li t0, 40 +; CHECK-NEXT: mul a6, a6, t0 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: addi a6, a7, -32 +; CHECK-NEXT: sltu t0, a7, a6 ; CHECK-NEXT: addi t0, t0, -1 ; CHECK-NEXT: and a6, t0, a6 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: addi t0, a6, -16 +; CHECK-NEXT: sltu t1, a6, t0 +; CHECK-NEXT: addi t1, t1, -1 +; CHECK-NEXT: and t0, t1, t0 +; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v28 +; CHECK-NEXT: addi t0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (t0) # Unknown-size Folded Reload ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: addi a6, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a5, a2, .LBB16_6 +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 16 +; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a6, a2, .LBB16_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: li a5, 16 +; CHECK-NEXT: li a6, 16 ; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: addi a6, a1, 384 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v20, v3, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a5) ; CHECK-NEXT: addi a1, a1, 256 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li t0, 40 -; CHECK-NEXT: mul a5, a5, t0 +; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 48 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a4, a3, .LBB16_8 ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: li a4, 32 ; CHECK-NEXT: .LBB16_8: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v3, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a6) ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: addi a1, a4, -16 +; CHECK-NEXT: sltu a5, a4, a1 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a1, a5, a1 +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: bltu a4, a2, .LBB16_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB16_10: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v2, v1, 2 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v3 ; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a4, -16 -; CHECK-NEXT: sltu a4, a4, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: bltu a7, a3, .LBB16_12 ; CHECK-NEXT: # %bb.11: ; CHECK-NEXT: li a7, 32 ; CHECK-NEXT: .LBB16_12: ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v24, v8 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 +; CHECK-NEXT: li a4, 56 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v8, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 +; CHECK-NEXT: li a4, 56 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: vmv4r.v v24, v8 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: li a4, 48 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: li a4, 48 ; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v24, 16 +; CHECK-NEXT: vslideup.vi v8, v16, 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -432,7 +456,8 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload @@ -444,25 +469,25 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v24, v8, 16 -; CHECK-NEXT: vse32.v v24, (a0) +; CHECK-NEXT: vslideup.vi v16, v8, 16 +; CHECK-NEXT: vse32.v v16, (a0) ; CHECK-NEXT: addi a1, a0, 256 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: li a3, 48 ; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 @@ -470,15 +495,14 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: addi a0, a0, 384 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 48 +; CHECK-NEXT: li a2, 56 ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 56 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 6 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -491,27 +515,27 @@ declare <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_v32i32_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v0 +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB17_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v12, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB17_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: vslideup.vi v8, v24, 16 ; CHECK-NEXT: ret %v = call <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64> %a, <32 x i1> %m, i32 %vl) ret <32 x i32> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll index b2d248623d93f..66b2c41d1e090 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll @@ -308,23 +308,23 @@ declare <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_v32f64_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB25_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index d0b2cabb8a649..c6654c8e592ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -1528,47 +1528,47 @@ declare <32 x i64> @llvm.vp.add.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vadd_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB108_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: li a1, 16 -; RV64-NEXT: vadd.vi v16, v16, -1, v0.t -; RV64-NEXT: bltu a0, a1, .LBB108_2 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB108_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB108_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v8, v8, -1, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vadd.vi v8, v8, -1, v0.t +; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer @@ -1649,17 +1649,16 @@ define <32 x i64> @vadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-LABEL: vadd_vx_v32i64_evl27: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vx_v32i64_evl27: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll index 80629c8cdd8d0..2a468d434903a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -324,46 +324,37 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfsgnj.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll index ee6630589c65b..592a72180262f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -321,23 +321,23 @@ declare <32 x double> @llvm.vp.fabs.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfabs_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfabs.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfabs.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfabs.v v8, v8, v0.t +; CHECK-NEXT: vfabs.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fabs.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll index dc95af50ac647..8243e86c74470 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll @@ -3227,10 +3227,10 @@ define <32 x i1> @fcmps_uno_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vv v16, v8, v8 +; CHECK-NEXT: vmfle.vf v8, v12, fa0 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v16 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll index ba512678791ef..951a5dda286e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -657,93 +657,75 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: addi a1, a2, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v1, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a2, 128 -; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vle64.v v24, (a2) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: bltu a4, a1, .LBB50_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a0, .LBB50_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB50_2: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll index 0163073a0f914..e4ca15ce6f646 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -324,46 +324,37 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t +; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll index 592a56b7af0c9..366acb32f5cfe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -324,46 +324,37 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v1, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t -; CHECK-NEXT: bltu a2, a0, .LBB26_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t +; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll index d1a3d6d5b1899..0d01f414e510d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -657,93 +657,75 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: addi a1, a2, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v1, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a2, 128 -; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vle64.v v24, (a2) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: bltu a4, a1, .LBB50_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a0, .LBB50_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB50_2: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll index c98abc5beecf8..71edf6721b681 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -321,23 +321,23 @@ declare <32 x double> @llvm.vp.fneg.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfneg_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfneg.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfneg.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfneg.v v8, v8, v0.t +; CHECK-NEXT: vfneg.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fneg.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll index 385d791a9d89c..de8386fc49b0d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll @@ -321,23 +321,23 @@ declare <32 x double> @llvm.vp.sqrt.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfsqrt.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB26_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfsqrt.v v8, v8, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfsqrt.v v8, v8, v0.t +; CHECK-NEXT: vfsqrt.v v16, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index 474125b11699a..db6178f6773dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -1091,48 +1091,48 @@ declare <32 x i64> @llvm.vp.smax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmax_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vmax.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vmax.vv v8, v8, v24, v0.t +; RV32-NEXT: vmax.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmax_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a1 -; RV64-NEXT: li a1, -1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vmax.vx v16, v16, a1, v0.t +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a2, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vmax.vx v8, v8, a1, v0.t +; RV64-NEXT: vmax.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index 0bf408cc292c8..00b69476777b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -1090,48 +1090,48 @@ declare <32 x i64> @llvm.vp.umax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmaxu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t +; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmaxu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a1 -; RV64-NEXT: li a1, -1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vmaxu.vx v16, v16, a1, v0.t +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a2, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vmaxu.vx v8, v8, a1, v0.t +; RV64-NEXT: vmaxu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index fddc981ad3c56..225432524d7fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -1091,48 +1091,48 @@ declare <32 x i64> @llvm.vp.smin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmin_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vmin.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vmin.vv v8, v8, v24, v0.t +; RV32-NEXT: vmin.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vmin_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a1 -; RV64-NEXT: li a1, -1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vmin.vx v16, v16, a1, v0.t +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a2, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vmin.vx v8, v8, a1, v0.t +; RV64-NEXT: vmin.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index 83b70b95d2b07..78fc04c5cf8fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -1090,48 +1090,48 @@ declare <32 x i64> @llvm.vp.umin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vminu_vx_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vslidedown.vi v1, v0, 2 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vminu.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB74_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vminu.vv v8, v8, v24, v0.t +; RV32-NEXT: vminu.vv v16, v16, v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vminu_vx_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a2, a2, a1 -; RV64-NEXT: li a1, -1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vminu.vx v16, v16, a1, v0.t +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB74_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a2, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vminu.vx v8, v8, a1, v0.t +; RV64-NEXT: vminu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index 74426c83c3d19..4451bce44a4b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -285,32 +285,32 @@ define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> % ; ; RV64-LABEL: vpgather_baseidx_v32i8: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v0 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB13_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB13_2: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v12 +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma -; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB13_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB13_2: -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: li a0, 32 ; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64-NEXT: vslideup.vi v8, v12, 16 +; RV64-NEXT: vslideup.vi v10, v8, 16 +; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs %v = call <32 x i8> @llvm.vp.gather.v32i8.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -1890,47 +1890,45 @@ declare <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr>, <32 x i1>, i32) define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: li a2, 16 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB86_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB86_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t ; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v8, 16 +; RV32-NEXT: vslidedown.vi v8, v8, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (zero), v24, v0.t -; RV32-NEXT: li a1, 16 -; RV32-NEXT: bltu a0, a1, .LBB86_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB86_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t -; RV32-NEXT: vmv.v.v v8, v24 +; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: vmv8r.v v8, v24 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB86_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB86_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t ; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a2, a0, a1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: and a1, a2, a1 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t -; RV64-NEXT: li a1, 16 -; RV64-NEXT: bltu a0, a1, .LBB86_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB86_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t +; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t ; RV64-NEXT: ret %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) ret <32 x double> %v @@ -1951,12 +1949,12 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV32-NEXT: .LBB87_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -1965,31 +1963,29 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; ; RV64-LABEL: vpgather_baseidx_v32i8_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vslidedown.vi v10, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB87_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB87_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB87_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB87_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i8> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -2011,12 +2007,12 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: .LBB88_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2025,30 +2021,30 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; ; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v12 ; RV64-NEXT: vsext.vf8 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB88_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB88_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB88_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB88_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext <32 x i8> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2071,12 +2067,12 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: .LBB89_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2085,30 +2081,30 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v10, v0 -; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf8 v16, v12 ; RV64-NEXT: vzext.vf8 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vzext.vf8 v16, v8 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB89_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB89_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB89_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB89_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext <32 x i8> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2131,12 +2127,12 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV32-NEXT: .LBB90_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2145,31 +2141,29 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; ; RV64-LABEL: vpgather_baseidx_v32i16_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsext.vf4 v16, v12 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB90_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB90_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB90_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB90_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i16> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -2191,12 +2185,12 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-NEXT: .LBB91_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2205,30 +2199,30 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; ; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v0, v16 ; RV64-NEXT: vsext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v24, v16, 3 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v12, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB91_2 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB91_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB91_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext <32 x i16> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2251,12 +2245,12 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-NEXT: .LBB92_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2265,30 +2259,30 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vzext.vf4 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf4 v0, v16 ; RV64-NEXT: vzext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v24, v16, 3 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v12, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB92_2 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB92_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB92_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext <32 x i16> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2310,12 +2304,12 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV32-NEXT: .LBB93_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2324,45 +2318,29 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; ; RV64-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: vmv1r.v v24, v0 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 16 +; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsext.vf2 v24, v16 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB93_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB93_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB93_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB93_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -2383,12 +2361,12 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-NEXT: .LBB94_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2397,30 +2375,30 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; ; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsext.vf2 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v0, v16 ; RV64-NEXT: vsext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v24, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB94_2 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB94_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB94_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = sext <32 x i32> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2442,12 +2420,12 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-NEXT: .LBB95_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -2456,30 +2434,30 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; ; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vzext.vf2 v24, v8 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v0, v16 ; RV64-NEXT: vzext.vf2 v16, v8 -; RV64-NEXT: vsll.vi v8, v16, 3 -; RV64-NEXT: vsll.vi v16, v0, 3 -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v24, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB95_2 +; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB95_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB95_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %eidxs = zext <32 x i32> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2519,26 +2497,25 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; ; RV64-LABEL: vpgather_baseidx_v32f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: li a3, 16 +; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a3, .LBB96_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: .LBB96_2: +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a3, a1, a2 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: li a2, 16 -; RV64-NEXT: bltu a1, a2, .LBB96_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB96_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 94e3245e0a184..e36d1286efb0a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -381,24 +381,23 @@ declare <32 x double> @llvm.vp.load.v32f64.p0(ptr, <32 x i1>, i32) define <32 x double> @vpload_v32f64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bltu a1, a3, .LBB31_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: .LBB31_2: +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: addi a2, a1, -16 -; CHECK-NEXT: sltu a3, a1, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: sltu a1, a1, a2 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a3, a0, 128 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a3), v0.t -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: bltu a1, a2, .LBB31_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB31_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <32 x double> @llvm.vp.load.v32f64.p0(ptr %ptr, <32 x i1> %m, i32 %evl) ret <32 x double> %load @@ -422,9 +421,9 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: sltu a5, a3, a4 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: addi a5, a1, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: addi a5, a1, 128 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a5), v0.t ; CHECK-NEXT: addi a4, a2, -32 @@ -436,9 +435,9 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a5), v0.t ; CHECK-NEXT: bltu a3, a2, .LBB32_6 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index a5b513455bd51..e7ad0127b62ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1065,41 +1065,41 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3 ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: bltu a2, a0, .LBB79_2 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB79_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB79_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, tu, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 @@ -1112,23 +1112,22 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3 define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vf_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB80_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 -; CHECK-NEXT: bltu a0, a1, .LBB80_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB80_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <32 x double> poison, double %a, i32 0 %va = shufflevector <32 x double> %elt.head, <32 x double> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index eafb136bf05a7..afd9fe4b1bbdd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1701,12 +1701,12 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m ; RV32-NEXT: .LBB79_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a0, a1, -16 ; RV32-NEXT: sltu a1, a1, a0 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1769,12 +1769,12 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV32-NEXT: .LBB80_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a1, a2, -16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -1854,12 +1854,12 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: .LBB81_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a1, a2, -16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -1941,12 +1941,12 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: .LBB82_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a1, a2, -16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll index 7570eb3d4293c..04b6cbc378c9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -297,9 +297,9 @@ define void @vpstore_v32f64(<32 x double> %val, ptr %ptr, <32 x i1> %m, i32 zero ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll index a746b550940ce..0855d4ca5906a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll @@ -1832,18 +1832,19 @@ define <2 x i64> @vror_vx_v2i64(<2 x i64> %a, i64 %b) { define <2 x i64> @vror_vi_v2i64(<2 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_v2i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v9, a0 +; CHECK-RV32-NEXT: vand.vi v9, v9, 1 +; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 ; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v9, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v9, v9, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v9, v9, a0 -; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: vand.vi v10, v10, 1 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v8, v9 +; CHECK-RV32-NEXT: vsub.vx v10, v10, a1 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v9, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_v2i64: @@ -1867,18 +1868,19 @@ define <2 x i64> @vror_vi_v2i64(<2 x i64> %a) { define <2 x i64> @vror_vi_rotl_v2i64(<2 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_v2i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v9, a0 +; CHECK-RV32-NEXT: vand.vi v9, v9, 1 +; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 ; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v9, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v10, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v9, v9, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v9, v9, a0 -; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: vand.vi v10, v10, 1 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v8, v9 +; CHECK-RV32-NEXT: vsub.vx v10, v10, a1 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v9, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_v2i64: @@ -2002,18 +2004,19 @@ define <4 x i64> @vror_vx_v4i64(<4 x i64> %a, i64 %b) { define <4 x i64> @vror_vi_v4i64(<4 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_v4i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 ; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v10, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v12, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v10, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: vand.vi v12, v12, 1 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v8, v10 +; CHECK-RV32-NEXT: vsub.vx v12, v12, a1 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v10, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_v4i64: @@ -2037,18 +2040,19 @@ define <4 x i64> @vror_vi_v4i64(<4 x i64> %a) { define <4 x i64> @vror_vi_rotl_v4i64(<4 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_v4i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 ; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v10, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v12, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v10, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: vand.vi v12, v12, 1 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v8, v10 +; CHECK-RV32-NEXT: vsub.vx v12, v12, a1 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v10, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_v4i64: @@ -2172,18 +2176,19 @@ define <8 x i64> @vror_vx_v8i64(<8 x i64> %a, i64 %b) { define <8 x i64> @vror_vi_v8i64(<8 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_v8i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 ; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v12, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v16, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vx v12, v12, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 -; CHECK-RV32-NEXT: vmv.v.x v16, a0 -; CHECK-RV32-NEXT: vand.vi v16, v16, 1 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v8, v12 +; CHECK-RV32-NEXT: vsub.vx v16, v16, a1 +; CHECK-RV32-NEXT: vand.vx v16, v16, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v12, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_v8i64: @@ -2207,18 +2212,19 @@ define <8 x i64> @vror_vi_v8i64(<8 x i64> %a) { define <8 x i64> @vror_vi_rotl_v8i64(<8 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_v8i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 ; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v12, 0 -; CHECK-RV32-NEXT: li a0, 1 +; CHECK-RV32-NEXT: vmv.v.i v16, 0 +; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vx v12, v12, a0 -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 -; CHECK-RV32-NEXT: vmv.v.x v16, a0 -; CHECK-RV32-NEXT: vand.vi v16, v16, 1 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v8, v12 +; CHECK-RV32-NEXT: vsub.vx v16, v16, a1 +; CHECK-RV32-NEXT: vand.vx v16, v16, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v12, v8 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_v8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index fd0a886eab745..96e926f29ded9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -403,46 +403,35 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: bltu a2, a0, .LBB25_2 +; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a1, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB25_2: -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -453,42 +442,15 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) { ; CHECK-LABEL: select_evl_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll index 191af6076b5d2..61b841936ac1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll @@ -153,26 +153,26 @@ declare <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32>, <32 x i1>, i32) define <32 x i64> @vzext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vzext_v32i64_v32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vzext.vf2 v16, v24, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB12_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB12_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB12_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vzext.vf2 v24, v8, v0.t -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vzext.vf2 v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v @@ -181,22 +181,23 @@ define <32 x i64> @vzext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext define <32 x i64> @vzext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vzext_v32i64_v32i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, -16 -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v8, 16 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: vzext.vf2 v16, v24 -; CHECK-NEXT: bltu a0, a1, .LBB13_2 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB13_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB13_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vzext.vf2 v24, v8 -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vzext.vf2 v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index b64c24456caf3..3e7565a572b92 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -456,9 +456,9 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -478,22 +478,36 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: lui a0, 524288 @@ -503,7 +517,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -614,9 +628,9 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -636,22 +650,36 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -660,7 +688,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -781,9 +809,9 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -803,22 +831,36 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -828,7 +870,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -2209,59 +2251,58 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB18_3 +; CHECK-V-NEXT: beqz a1, .LBB18_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 -; CHECK-V-NEXT: bnez a1, .LBB18_4 +; CHECK-V-NEXT: slti a4, a1, 0 +; CHECK-V-NEXT: bnez s1, .LBB18_4 ; CHECK-V-NEXT: .LBB18_2: -; CHECK-V-NEXT: sltu a5, a0, a3 -; CHECK-V-NEXT: neg a6, a4 -; CHECK-V-NEXT: beqz a4, .LBB18_5 +; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: beqz a5, .LBB18_5 ; CHECK-V-NEXT: j .LBB18_6 ; CHECK-V-NEXT: .LBB18_3: -; CHECK-V-NEXT: sltu a4, s0, a3 -; CHECK-V-NEXT: beqz a1, .LBB18_2 +; CHECK-V-NEXT: sltu a4, a0, a3 +; CHECK-V-NEXT: beqz s1, .LBB18_2 ; CHECK-V-NEXT: .LBB18_4: # %entry -; CHECK-V-NEXT: slti a5, a1, 0 -; CHECK-V-NEXT: neg a6, a4 -; CHECK-V-NEXT: bnez a4, .LBB18_6 +; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: bnez a5, .LBB18_6 ; CHECK-V-NEXT: .LBB18_5: # %entry ; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB18_6: # %entry -; CHECK-V-NEXT: and a6, a6, s1 -; CHECK-V-NEXT: neg a4, a5 -; CHECK-V-NEXT: bnez a5, .LBB18_8 +; CHECK-V-NEXT: neg a6, a5 +; CHECK-V-NEXT: neg a5, a4 +; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: bnez a4, .LBB18_8 ; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB18_8: # %entry -; CHECK-V-NEXT: and a4, a4, a1 +; CHECK-V-NEXT: and a4, a6, s1 ; CHECK-V-NEXT: slli a1, a2, 63 -; CHECK-V-NEXT: beq a6, a2, .LBB18_11 +; CHECK-V-NEXT: beq a5, a2, .LBB18_11 ; CHECK-V-NEXT: # %bb.9: # %entry -; CHECK-V-NEXT: slti a3, a6, 0 +; CHECK-V-NEXT: slti a3, a5, 0 ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB18_12 ; CHECK-V-NEXT: .LBB18_10: -; CHECK-V-NEXT: sltu a2, a1, a0 -; CHECK-V-NEXT: beqz a3, .LBB18_13 +; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: beqz a2, .LBB18_13 ; CHECK-V-NEXT: j .LBB18_14 ; CHECK-V-NEXT: .LBB18_11: -; CHECK-V-NEXT: sltu a3, a1, s0 +; CHECK-V-NEXT: sltu a3, a1, a0 ; CHECK-V-NEXT: beq a4, a2, .LBB18_10 ; CHECK-V-NEXT: .LBB18_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 -; CHECK-V-NEXT: bnez a3, .LBB18_14 +; CHECK-V-NEXT: bnez a2, .LBB18_14 ; CHECK-V-NEXT: .LBB18_13: # %entry ; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB18_14: # %entry -; CHECK-V-NEXT: bnez a2, .LBB18_16 +; CHECK-V-NEXT: bnez a3, .LBB18_16 ; CHECK-V-NEXT: # %bb.15: # %entry ; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB18_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2341,15 +2382,15 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt -; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2446,41 +2487,41 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixdfti@plt -; CHECK-V-NEXT: mv s1, a0 -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti@plt -; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: blez a1, .LBB20_2 +; CHECK-V-NEXT: mv a2, s1 +; CHECK-V-NEXT: blez s1, .LBB20_2 ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB20_2: # %entry -; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: slti a1, a1, 1 -; CHECK-V-NEXT: blez s0, .LBB20_4 +; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: blez a1, .LBB20_4 ; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li s0, 1 +; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB20_4: # %entry ; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: neg a1, a1 -; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: beqz a2, .LBB20_7 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: beqz a1, .LBB20_7 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a1, a2 -; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: bnez s0, .LBB20_8 +; CHECK-V-NEXT: sgtz a1, a1 +; CHECK-V-NEXT: and a3, a3, s0 +; CHECK-V-NEXT: bnez a2, .LBB20_8 ; CHECK-V-NEXT: .LBB20_6: ; CHECK-V-NEXT: snez a2, a3 ; CHECK-V-NEXT: j .LBB20_9 ; CHECK-V-NEXT: .LBB20_7: ; CHECK-V-NEXT: snez a1, a0 -; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: beqz s0, .LBB20_6 +; CHECK-V-NEXT: and a3, a3, s0 +; CHECK-V-NEXT: beqz a2, .LBB20_6 ; CHECK-V-NEXT: .LBB20_8: # %entry -; CHECK-V-NEXT: sgtz a2, s0 +; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: .LBB20_9: # %entry ; CHECK-V-NEXT: neg a2, a2 ; CHECK-V-NEXT: and a2, a2, a3 @@ -2617,59 +2658,58 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB21_3 +; CHECK-V-NEXT: beqz a1, .LBB21_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 -; CHECK-V-NEXT: bnez a1, .LBB21_4 +; CHECK-V-NEXT: slti a4, a1, 0 +; CHECK-V-NEXT: bnez s1, .LBB21_4 ; CHECK-V-NEXT: .LBB21_2: -; CHECK-V-NEXT: sltu a5, a0, a3 -; CHECK-V-NEXT: neg a6, a4 -; CHECK-V-NEXT: beqz a4, .LBB21_5 +; CHECK-V-NEXT: sltu a5, s0, a3 +; CHECK-V-NEXT: beqz a5, .LBB21_5 ; CHECK-V-NEXT: j .LBB21_6 ; CHECK-V-NEXT: .LBB21_3: -; CHECK-V-NEXT: sltu a4, s0, a3 -; CHECK-V-NEXT: beqz a1, .LBB21_2 +; CHECK-V-NEXT: sltu a4, a0, a3 +; CHECK-V-NEXT: beqz s1, .LBB21_2 ; CHECK-V-NEXT: .LBB21_4: # %entry -; CHECK-V-NEXT: slti a5, a1, 0 -; CHECK-V-NEXT: neg a6, a4 -; CHECK-V-NEXT: bnez a4, .LBB21_6 +; CHECK-V-NEXT: slti a5, s1, 0 +; CHECK-V-NEXT: bnez a5, .LBB21_6 ; CHECK-V-NEXT: .LBB21_5: # %entry ; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB21_6: # %entry -; CHECK-V-NEXT: and a6, a6, s1 -; CHECK-V-NEXT: neg a4, a5 -; CHECK-V-NEXT: bnez a5, .LBB21_8 +; CHECK-V-NEXT: neg a6, a5 +; CHECK-V-NEXT: neg a5, a4 +; CHECK-V-NEXT: and a5, a5, a1 +; CHECK-V-NEXT: bnez a4, .LBB21_8 ; CHECK-V-NEXT: # %bb.7: # %entry ; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB21_8: # %entry -; CHECK-V-NEXT: and a4, a4, a1 +; CHECK-V-NEXT: and a4, a6, s1 ; CHECK-V-NEXT: slli a1, a2, 63 -; CHECK-V-NEXT: beq a6, a2, .LBB21_11 +; CHECK-V-NEXT: beq a5, a2, .LBB21_11 ; CHECK-V-NEXT: # %bb.9: # %entry -; CHECK-V-NEXT: slti a3, a6, 0 +; CHECK-V-NEXT: slti a3, a5, 0 ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB21_12 ; CHECK-V-NEXT: .LBB21_10: -; CHECK-V-NEXT: sltu a2, a1, a0 -; CHECK-V-NEXT: beqz a3, .LBB21_13 +; CHECK-V-NEXT: sltu a2, a1, s0 +; CHECK-V-NEXT: beqz a2, .LBB21_13 ; CHECK-V-NEXT: j .LBB21_14 ; CHECK-V-NEXT: .LBB21_11: -; CHECK-V-NEXT: sltu a3, a1, s0 +; CHECK-V-NEXT: sltu a3, a1, a0 ; CHECK-V-NEXT: beq a4, a2, .LBB21_10 ; CHECK-V-NEXT: .LBB21_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 -; CHECK-V-NEXT: bnez a3, .LBB21_14 +; CHECK-V-NEXT: bnez a2, .LBB21_14 ; CHECK-V-NEXT: .LBB21_13: # %entry ; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB21_14: # %entry -; CHECK-V-NEXT: bnez a2, .LBB21_16 +; CHECK-V-NEXT: bnez a3, .LBB21_16 ; CHECK-V-NEXT: # %bb.15: # %entry ; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB21_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2749,15 +2789,15 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: snez a1, a1 +; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2854,41 +2894,41 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixsfti@plt -; CHECK-V-NEXT: mv s1, a0 -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti@plt -; CHECK-V-NEXT: mv a2, a1 -; CHECK-V-NEXT: blez a1, .LBB23_2 +; CHECK-V-NEXT: mv a2, s1 +; CHECK-V-NEXT: blez s1, .LBB23_2 ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB23_2: # %entry -; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: slti a1, a1, 1 -; CHECK-V-NEXT: blez s0, .LBB23_4 +; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: slti a3, s1, 1 +; CHECK-V-NEXT: blez a1, .LBB23_4 ; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li s0, 1 +; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB23_4: # %entry ; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: neg a1, a1 -; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: beqz a2, .LBB23_7 +; CHECK-V-NEXT: neg a4, a4 +; CHECK-V-NEXT: and a0, a4, a0 +; CHECK-V-NEXT: beqz a1, .LBB23_7 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: sgtz a1, a2 -; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: bnez s0, .LBB23_8 +; CHECK-V-NEXT: sgtz a1, a1 +; CHECK-V-NEXT: and a3, a3, s0 +; CHECK-V-NEXT: bnez a2, .LBB23_8 ; CHECK-V-NEXT: .LBB23_6: ; CHECK-V-NEXT: snez a2, a3 ; CHECK-V-NEXT: j .LBB23_9 ; CHECK-V-NEXT: .LBB23_7: ; CHECK-V-NEXT: snez a1, a0 -; CHECK-V-NEXT: and a3, a3, s1 -; CHECK-V-NEXT: beqz s0, .LBB23_6 +; CHECK-V-NEXT: and a3, a3, s0 +; CHECK-V-NEXT: beqz a2, .LBB23_6 ; CHECK-V-NEXT: .LBB23_8: # %entry -; CHECK-V-NEXT: sgtz a2, s0 +; CHECK-V-NEXT: sgtz a2, a2 ; CHECK-V-NEXT: .LBB23_9: # %entry ; CHECK-V-NEXT: neg a2, a2 ; CHECK-V-NEXT: and a2, a2, a3 @@ -3752,9 +3792,9 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -3774,22 +3814,36 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: lui a0, 524288 @@ -3799,7 +3853,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -3908,9 +3962,9 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -3930,22 +3984,36 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -3954,7 +4022,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -4074,9 +4142,9 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -4096,22 +4164,36 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -4121,7 +4203,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -5486,62 +5568,61 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB45_2 +; CHECK-V-NEXT: beqz a1, .LBB45_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: slti a4, a1, 0 ; CHECK-V-NEXT: beqz a4, .LBB45_3 ; CHECK-V-NEXT: j .LBB45_4 ; CHECK-V-NEXT: .LBB45_2: -; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: sltu a4, a0, a3 ; CHECK-V-NEXT: bnez a4, .LBB45_4 ; CHECK-V-NEXT: .LBB45_3: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB45_4: # %entry -; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: beqz a1, .LBB45_6 +; CHECK-V-NEXT: beqz s1, .LBB45_6 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: and a5, a5, s1 -; CHECK-V-NEXT: beqz a4, .LBB45_7 -; CHECK-V-NEXT: j .LBB45_8 +; CHECK-V-NEXT: slti a6, s1, 0 +; CHECK-V-NEXT: j .LBB45_7 ; CHECK-V-NEXT: .LBB45_6: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: and a5, a5, s1 -; CHECK-V-NEXT: bnez a4, .LBB45_8 +; CHECK-V-NEXT: sltu a6, s0, a3 ; CHECK-V-NEXT: .LBB45_7: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: .LBB45_8: # %entry +; CHECK-V-NEXT: neg a5, a6 +; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: bnez a6, .LBB45_9 +; CHECK-V-NEXT: # %bb.8: # %entry +; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: .LBB45_9: # %entry ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: beq a5, a2, .LBB45_10 -; CHECK-V-NEXT: # %bb.9: # %entry +; CHECK-V-NEXT: beq a5, a2, .LBB45_11 +; CHECK-V-NEXT: # %bb.10: # %entry ; CHECK-V-NEXT: slti a5, a5, 0 ; CHECK-V-NEXT: xori a5, a5, 1 ; CHECK-V-NEXT: and a1, a4, a1 -; CHECK-V-NEXT: beqz a5, .LBB45_11 -; CHECK-V-NEXT: j .LBB45_12 -; CHECK-V-NEXT: .LBB45_10: +; CHECK-V-NEXT: beqz a5, .LBB45_12 +; CHECK-V-NEXT: j .LBB45_13 +; CHECK-V-NEXT: .LBB45_11: ; CHECK-V-NEXT: sltu a5, a3, s0 ; CHECK-V-NEXT: and a1, a4, a1 -; CHECK-V-NEXT: bnez a5, .LBB45_12 -; CHECK-V-NEXT: .LBB45_11: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: bnez a5, .LBB45_13 ; CHECK-V-NEXT: .LBB45_12: # %entry -; CHECK-V-NEXT: beq a1, a2, .LBB45_14 -; CHECK-V-NEXT: # %bb.13: # %entry +; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: .LBB45_13: # %entry +; CHECK-V-NEXT: beq a1, a2, .LBB45_15 +; CHECK-V-NEXT: # %bb.14: # %entry ; CHECK-V-NEXT: slti a1, a1, 0 ; CHECK-V-NEXT: xori a1, a1, 1 -; CHECK-V-NEXT: beqz a1, .LBB45_15 -; CHECK-V-NEXT: j .LBB45_16 -; CHECK-V-NEXT: .LBB45_14: +; CHECK-V-NEXT: beqz a1, .LBB45_16 +; CHECK-V-NEXT: j .LBB45_17 +; CHECK-V-NEXT: .LBB45_15: ; CHECK-V-NEXT: sltu a1, a3, a0 -; CHECK-V-NEXT: bnez a1, .LBB45_16 -; CHECK-V-NEXT: .LBB45_15: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: bnez a1, .LBB45_17 ; CHECK-V-NEXT: .LBB45_16: # %entry +; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: .LBB45_17: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5608,26 +5689,26 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti@plt -; CHECK-V-NEXT: snez a2, s1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: snez a1, s1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a1, a1, s0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a2 -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v8, a1 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5712,8 +5793,8 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixdfti@plt -; CHECK-V-NEXT: mv s1, a0 -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload @@ -5724,19 +5805,20 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB47_2: # %entry -; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a3, a3, s1 +; CHECK-V-NEXT: mv a3, s1 +; CHECK-V-NEXT: blez s1, .LBB47_4 +; CHECK-V-NEXT: # %bb.3: # %entry +; CHECK-V-NEXT: li a3, 1 +; CHECK-V-NEXT: .LBB47_4: # %entry ; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: blez s0, .LBB47_4 -; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li s0, 1 -; CHECK-V-NEXT: .LBB47_4: # %entry -; CHECK-V-NEXT: slti a1, s0, 0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: and a1, a1, a3 +; CHECK-V-NEXT: slti a1, s1, 1 +; CHECK-V-NEXT: neg a1, a1 +; CHECK-V-NEXT: and a1, a1, s0 +; CHECK-V-NEXT: slti a3, a3, 0 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a1, a3, a1 ; CHECK-V-NEXT: slti a2, a2, 0 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a0, a2, a0 @@ -5872,62 +5954,61 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti@plt ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB48_2 +; CHECK-V-NEXT: beqz a1, .LBB48_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 +; CHECK-V-NEXT: slti a4, a1, 0 ; CHECK-V-NEXT: beqz a4, .LBB48_3 ; CHECK-V-NEXT: j .LBB48_4 ; CHECK-V-NEXT: .LBB48_2: -; CHECK-V-NEXT: sltu a4, s0, a3 +; CHECK-V-NEXT: sltu a4, a0, a3 ; CHECK-V-NEXT: bnez a4, .LBB48_4 ; CHECK-V-NEXT: .LBB48_3: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB48_4: # %entry -; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: beqz a1, .LBB48_6 +; CHECK-V-NEXT: beqz s1, .LBB48_6 ; CHECK-V-NEXT: # %bb.5: # %entry -; CHECK-V-NEXT: slti a4, a1, 0 -; CHECK-V-NEXT: and a5, a5, s1 -; CHECK-V-NEXT: beqz a4, .LBB48_7 -; CHECK-V-NEXT: j .LBB48_8 +; CHECK-V-NEXT: slti a6, s1, 0 +; CHECK-V-NEXT: j .LBB48_7 ; CHECK-V-NEXT: .LBB48_6: -; CHECK-V-NEXT: sltu a4, a0, a3 -; CHECK-V-NEXT: and a5, a5, s1 -; CHECK-V-NEXT: bnez a4, .LBB48_8 +; CHECK-V-NEXT: sltu a6, s0, a3 ; CHECK-V-NEXT: .LBB48_7: # %entry -; CHECK-V-NEXT: mv a0, a3 -; CHECK-V-NEXT: .LBB48_8: # %entry +; CHECK-V-NEXT: neg a5, a6 +; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: bnez a6, .LBB48_9 +; CHECK-V-NEXT: # %bb.8: # %entry +; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: .LBB48_9: # %entry ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: slli a3, a2, 63 -; CHECK-V-NEXT: beq a5, a2, .LBB48_10 -; CHECK-V-NEXT: # %bb.9: # %entry +; CHECK-V-NEXT: beq a5, a2, .LBB48_11 +; CHECK-V-NEXT: # %bb.10: # %entry ; CHECK-V-NEXT: slti a5, a5, 0 ; CHECK-V-NEXT: xori a5, a5, 1 ; CHECK-V-NEXT: and a1, a4, a1 -; CHECK-V-NEXT: beqz a5, .LBB48_11 -; CHECK-V-NEXT: j .LBB48_12 -; CHECK-V-NEXT: .LBB48_10: +; CHECK-V-NEXT: beqz a5, .LBB48_12 +; CHECK-V-NEXT: j .LBB48_13 +; CHECK-V-NEXT: .LBB48_11: ; CHECK-V-NEXT: sltu a5, a3, s0 ; CHECK-V-NEXT: and a1, a4, a1 -; CHECK-V-NEXT: bnez a5, .LBB48_12 -; CHECK-V-NEXT: .LBB48_11: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: bnez a5, .LBB48_13 ; CHECK-V-NEXT: .LBB48_12: # %entry -; CHECK-V-NEXT: beq a1, a2, .LBB48_14 -; CHECK-V-NEXT: # %bb.13: # %entry +; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: .LBB48_13: # %entry +; CHECK-V-NEXT: beq a1, a2, .LBB48_15 +; CHECK-V-NEXT: # %bb.14: # %entry ; CHECK-V-NEXT: slti a1, a1, 0 ; CHECK-V-NEXT: xori a1, a1, 1 -; CHECK-V-NEXT: beqz a1, .LBB48_15 -; CHECK-V-NEXT: j .LBB48_16 -; CHECK-V-NEXT: .LBB48_14: +; CHECK-V-NEXT: beqz a1, .LBB48_16 +; CHECK-V-NEXT: j .LBB48_17 +; CHECK-V-NEXT: .LBB48_15: ; CHECK-V-NEXT: sltu a1, a3, a0 -; CHECK-V-NEXT: bnez a1, .LBB48_16 -; CHECK-V-NEXT: .LBB48_15: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: bnez a1, .LBB48_17 ; CHECK-V-NEXT: .LBB48_16: # %entry +; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: .LBB48_17: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -5994,26 +6075,26 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-V-NEXT: vfmv.f.s fa0, v9 +; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma +; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma +; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti@plt -; CHECK-V-NEXT: snez a2, s1 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a2, a2, s0 ; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: snez a1, s1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a1, a1, s0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a2 -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v8, a1 +; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -6098,8 +6179,8 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixsfti@plt -; CHECK-V-NEXT: mv s1, a0 -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv s0, a0 +; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 0, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload @@ -6110,19 +6191,20 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB50_2: # %entry -; CHECK-V-NEXT: slti a3, s0, 1 -; CHECK-V-NEXT: neg a3, a3 -; CHECK-V-NEXT: and a3, a3, s1 +; CHECK-V-NEXT: mv a3, s1 +; CHECK-V-NEXT: blez s1, .LBB50_4 +; CHECK-V-NEXT: # %bb.3: # %entry +; CHECK-V-NEXT: li a3, 1 +; CHECK-V-NEXT: .LBB50_4: # %entry ; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a0, a1, a0 -; CHECK-V-NEXT: blez s0, .LBB50_4 -; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li s0, 1 -; CHECK-V-NEXT: .LBB50_4: # %entry -; CHECK-V-NEXT: slti a1, s0, 0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: and a1, a1, a3 +; CHECK-V-NEXT: slti a1, s1, 1 +; CHECK-V-NEXT: neg a1, a1 +; CHECK-V-NEXT: and a1, a1, s0 +; CHECK-V-NEXT: slti a3, a3, 0 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a1, a3, a1 ; CHECK-V-NEXT: slti a2, a2, 0 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a0, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll index e92357fc31659..8f36aad817274 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll @@ -53,11 +53,10 @@ define @test_signed_v8f32_v8i32( %f) { define @test_signed_v4f32_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i16( %f) @@ -67,11 +66,10 @@ define @test_signed_v4f32_v4i16( %f) { define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) @@ -82,8 +80,8 @@ define @test_signed_v2f32_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -95,8 +93,8 @@ define @test_signed_v4f32_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -117,11 +115,10 @@ declare @llvm.fptosi.sat.nxv4f64.nxv4i64( @test_signed_v2f64_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f64.nxv2i32( %f) @@ -131,11 +128,10 @@ define @test_signed_v2f64_v2i32( %f) { define @test_signed_v4f64_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i32( %f) @@ -145,11 +141,10 @@ define @test_signed_v4f64_v4i32( %f) { define @test_signed_v8f64_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f64.nxv8i32( %f) @@ -241,8 +236,8 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: ret @@ -254,8 +249,8 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -267,8 +262,8 @@ define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -304,11 +299,10 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -320,11 +314,10 @@ define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll index 7d10bb3e4270a..242034f9826cb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptoui-sat.ll @@ -53,11 +53,10 @@ define @test_signed_v8f32_v8i32( %f) { define @test_signed_v4f32_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f32.nxv4i16( %f) @@ -67,11 +66,10 @@ define @test_signed_v4f32_v4i16( %f) { define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f32.nxv8i16( %f) @@ -82,8 +80,8 @@ define @test_signed_v2f32_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -95,8 +93,8 @@ define @test_signed_v4f32_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -117,11 +115,10 @@ declare @llvm.fptoui.sat.nxv4f64.nxv4i64( @test_signed_v2f64_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f64.nxv2i32( %f) @@ -131,11 +128,10 @@ define @test_signed_v2f64_v2i32( %f) { define @test_signed_v4f64_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f64.nxv4i32( %f) @@ -145,11 +141,10 @@ define @test_signed_v4f64_v4i32( %f) { define @test_signed_v8f64_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 ; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f64.nxv8i32( %f) @@ -259,8 +254,8 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0 ; CHECK-NEXT: ret @@ -272,8 +267,8 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -285,8 +280,8 @@ define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret @@ -322,11 +317,10 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret @@ -338,11 +332,10 @@ define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index 0b705eda2d913..0eb69c89f2c44 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -966,8 +966,7 @@ define @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @vpload_nxv8i64(* %ptr, @llvm.vp.load.nxv8i64.p0(* %ptr, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll index 950c55ba1cf17..57de8341cb89c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -438,14 +438,14 @@ define @insert_nxv4i1_nxv1i1_2( %v, @insertelt_nxv1f16_imm( %v, half % define @insertelt_nxv1f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret @@ -62,9 +62,9 @@ define @insertelt_nxv2f16_imm( %v, half % define @insertelt_nxv2f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret @@ -96,9 +96,9 @@ define @insertelt_nxv4f16_imm( %v, half % define @insertelt_nxv4f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret @@ -232,9 +232,9 @@ define @insertelt_nxv1f32_imm( %v, floa define @insertelt_nxv1f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret @@ -266,9 +266,9 @@ define @insertelt_nxv2f32_imm( %v, floa define @insertelt_nxv2f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret @@ -402,9 +402,9 @@ define @insertelt_nxv1f64_imm( %v, do define @insertelt_nxv1f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: addi a1, a0, 1 +; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll index 3ee8641385bcd..a7bd15f2a7b33 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll @@ -25,9 +25,9 @@ define @insertelt_idx_nxv1i1( %x, i1 %elt, i6 ; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -60,9 +60,9 @@ define @insertelt_idx_nxv2i1( %x, i1 %elt, i6 ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -95,9 +95,9 @@ define @insertelt_idx_nxv4i1( %x, i1 %elt, i6 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -130,9 +130,9 @@ define @insertelt_idx_nxv8i1( %x, i1 %elt, i6 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: addi a2, a1, 1 ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll index 327329eac5b88..79de1574b9e7c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -26,10 +26,10 @@ define @insertelt_nxv1i8_imm( %v, i8 signext define @insertelt_nxv1i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv1i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -60,10 +60,10 @@ define @insertelt_nxv2i8_imm( %v, i8 signext define @insertelt_nxv2i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv2i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -94,10 +94,10 @@ define @insertelt_nxv4i8_imm( %v, i8 signext define @insertelt_nxv4i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv4i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -128,10 +128,10 @@ define @insertelt_nxv8i8_imm( %v, i8 signext define @insertelt_nxv8i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv8i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -264,10 +264,10 @@ define @insertelt_nxv1i16_imm( %v, i16 sign define @insertelt_nxv1i16_idx( %v, i16 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv1i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -298,10 +298,10 @@ define @insertelt_nxv2i16_imm( %v, i16 sign define @insertelt_nxv2i16_idx( %v, i16 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv2i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -332,10 +332,10 @@ define @insertelt_nxv4i16_imm( %v, i16 sign define @insertelt_nxv4i16_idx( %v, i16 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv4i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -468,10 +468,10 @@ define @insertelt_nxv1i32_imm( %v, i32 %elt define @insertelt_nxv1i32_idx( %v, i32 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv1i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e32, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -502,10 +502,10 @@ define @insertelt_nxv2i32_imm( %v, i32 %elt define @insertelt_nxv2i32_idx( %v, i32 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv2i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll index 661bd56615301..1dd00197bbbb0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -26,10 +26,10 @@ define @insertelt_nxv1i8_imm( %v, i8 signext define @insertelt_nxv1i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -60,10 +60,10 @@ define @insertelt_nxv2i8_imm( %v, i8 signext define @insertelt_nxv2i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -94,10 +94,10 @@ define @insertelt_nxv4i8_imm( %v, i8 signext define @insertelt_nxv4i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -128,10 +128,10 @@ define @insertelt_nxv8i8_imm( %v, i8 signext define @insertelt_nxv8i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 %idx @@ -264,10 +264,10 @@ define @insertelt_nxv1i16_imm( %v, i16 sign define @insertelt_nxv1i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -298,10 +298,10 @@ define @insertelt_nxv2i16_imm( %v, i16 sign define @insertelt_nxv2i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -332,10 +332,10 @@ define @insertelt_nxv4i16_imm( %v, i16 sign define @insertelt_nxv4i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 %idx @@ -468,10 +468,10 @@ define @insertelt_nxv1i32_imm( %v, i32 sign define @insertelt_nxv1i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv1i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e32, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx @@ -502,10 +502,10 @@ define @insertelt_nxv2i32_imm( %v, i32 sign define @insertelt_nxv2i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a2, a1, 1 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: addi a0, a1, 1 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll index 4730c2755acdb..accc18519d626 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -169,19 +169,19 @@ define void @memset_16(ptr %a, i8 %value) nounwind { define void @memset_32(ptr %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: memset_32: ; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: addi a2, a0, 16 ; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse8.v v8, (a1) +; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: vse8.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: memset_32: ; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: addi a2, a0, 16 ; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse8.v v8, (a1) +; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: vse8.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0) @@ -191,10 +191,10 @@ define void @memset_32(ptr %a, i8 %value) nounwind { define void @memset_64(ptr %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: memset_64: ; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: addi a2, a0, 48 ; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vse8.v v8, (a1) +; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: addi a1, a0, 32 ; RV32-BOTH-NEXT: vse8.v v8, (a1) ; RV32-BOTH-NEXT: addi a1, a0, 16 @@ -204,10 +204,10 @@ define void @memset_64(ptr %a, i8 %value) nounwind { ; ; RV64-BOTH-LABEL: memset_64: ; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: addi a2, a0, 48 ; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vse8.v v8, (a1) +; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: addi a1, a0, 32 ; RV64-BOTH-NEXT: vse8.v v8, (a1) ; RV64-BOTH-NEXT: addi a1, a0, 16 @@ -309,19 +309,19 @@ define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind { define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: aligned_memset_32: ; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: addi a2, a0, 16 ; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse8.v v8, (a1) +; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: vse8.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_memset_32: ; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: addi a2, a0, 16 ; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse8.v v8, (a1) +; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: vse8.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0) @@ -331,10 +331,10 @@ define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind { define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind { ; RV32-BOTH-LABEL: aligned_memset_64: ; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: addi a2, a0, 48 ; RV32-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.x v8, a1 -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vse8.v v8, (a1) +; RV32-BOTH-NEXT: vse8.v v8, (a2) ; RV32-BOTH-NEXT: addi a1, a0, 32 ; RV32-BOTH-NEXT: vse8.v v8, (a1) ; RV32-BOTH-NEXT: addi a1, a0, 16 @@ -344,10 +344,10 @@ define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind { ; ; RV64-BOTH-LABEL: aligned_memset_64: ; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: addi a2, a0, 48 ; RV64-BOTH-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.x v8, a1 -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vse8.v v8, (a1) +; RV64-BOTH-NEXT: vse8.v v8, (a2) ; RV64-BOTH-NEXT: addi a1, a0, 32 ; RV64-BOTH-NEXT: vse8.v v8, (a1) ; RV64-BOTH-NEXT: addi a1, a0, 16 @@ -504,37 +504,37 @@ define void @bzero_16(ptr %a) nounwind { define void @bzero_32(ptr %a) nounwind { ; RV32-LABEL: bzero_32: ; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 16 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vse8.v v8, (a1) +; RV32-NEXT: vse8.v v8, (a0) +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_32: ; RV64: # %bb.0: -; RV64-NEXT: addi a1, a0, 16 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vse8.v v8, (a1) +; RV64-NEXT: vse8.v v8, (a0) +; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_32: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: addi a1, a0, 16 ; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-FAST-NEXT: vmv.v.i v8, 0 -; RV32-FAST-NEXT: vse64.v v8, (a1) +; RV32-FAST-NEXT: vse64.v v8, (a0) +; RV32-FAST-NEXT: addi a0, a0, 16 ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_32: ; RV64-FAST: # %bb.0: -; RV64-FAST-NEXT: addi a1, a0, 16 ; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-FAST-NEXT: vmv.v.i v8, 0 -; RV64-FAST-NEXT: vse64.v v8, (a1) +; RV64-FAST-NEXT: vse64.v v8, (a0) +; RV64-FAST-NEXT: addi a0, a0, 16 ; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0) @@ -642,19 +642,19 @@ define void @aligned_bzero_16(ptr %a) nounwind { define void @aligned_bzero_32(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_32: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a1, a0, 16 ; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) +; RV32-BOTH-NEXT: vse64.v v8, (a0) +; RV32-BOTH-NEXT: addi a0, a0, 16 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_32: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a1, a0, 16 ; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) +; RV64-BOTH-NEXT: vse64.v v8, (a0) +; RV64-BOTH-NEXT: addi a0, a0, 16 ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0) @@ -702,27 +702,27 @@ define void @aligned_bzero_66(ptr %a) nounwind { define void @aligned_bzero_96(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_96: ; RV32-BOTH: # %bb.0: +; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-BOTH-NEXT: vmv.v.i v8, 0 +; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: addi a1, a0, 80 ; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 ; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 64 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-BOTH-NEXT: vmv.v.i v8, 0 +; RV32-BOTH-NEXT: addi a0, a0, 64 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_96: ; RV64-BOTH: # %bb.0: +; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-BOTH-NEXT: vmv.v.i v8, 0 +; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: addi a1, a0, 80 ; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 ; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 64 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-BOTH-NEXT: vmv.v.i v8, 0 +; RV64-BOTH-NEXT: addi a0, a0, 64 ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 96, i1 0) @@ -750,19 +750,19 @@ define void @aligned_bzero_128(ptr %a) nounwind { define void @aligned_bzero_256(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_256: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a1, a0, 128 ; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) +; RV32-BOTH-NEXT: vse64.v v8, (a0) +; RV32-BOTH-NEXT: addi a0, a0, 128 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_256: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a1, a0, 128 ; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) +; RV64-BOTH-NEXT: vse64.v v8, (a0) +; RV64-BOTH-NEXT: addi a0, a0, 128 ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 256, i1 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index 271dd3d85ecb9..3a9d2556aff63 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -2080,7 +2080,10 @@ define @mgather_baseidx_nxv16i8(ptr %base, ; ; RV64-LABEL: mgather_baseidx_nxv16i8: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma @@ -2089,11 +2092,6 @@ define @mgather_baseidx_nxv16i8(ptr %base, ; RV64-NEXT: vsext.vf8 v16, v9 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: vmv2r.v v8, v10 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs @@ -2106,49 +2104,45 @@ declare @llvm.masked.gather.nxv32i8.nxv32p0( @mgather_baseidx_nxv32i8(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv32i8: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v16, v0 +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vsext.vf4 v16, v8 +; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v24, v10 -; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32-NEXT: vluxei32.v v14, (a0), v24, v0.t -; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v24, v8 +; RV32-NEXT: vsext.vf4 v16, v10 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu -; RV32-NEXT: vmv1r.v v0, v16 -; RV32-NEXT: vluxei32.v v12, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v14, (a0), v16, v0.t ; RV32-NEXT: vmv4r.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_nxv32i8: ; RV64: # %bb.0: ; RV64-NEXT: vmv1r.v v16, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a2, a1, 2 -; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v17, v0, a2 -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v10 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v17 -; RV64-NEXT: vluxei64.v v14, (a0), v24, v0.t -; RV64-NEXT: srli a1, a1, 3 -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v16, a1 -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a2, a1, 3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a2 +; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v9 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: srli a1, a1, 2 +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v0, v16, a1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v16 -; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v17, a1 +; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t +; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a2 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v11 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll index 08be15cdd05a6..be8281181cd2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -16,11 +16,11 @@ define @reverse_nxv2i1( %a) { ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 @@ -33,10 +33,10 @@ define @reverse_nxv2i1( %a) { ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: srli a0, a0, 2 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-256-NEXT: vand.vi v8, v10, 1 @@ -48,10 +48,10 @@ define @reverse_nxv2i1( %a) { ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV32-BITS-512-NEXT: vmv.v.i v8, 0 ; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: srli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-512-NEXT: vand.vi v8, v10, 1 @@ -63,11 +63,11 @@ define @reverse_nxv2i1( %a) { ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 @@ -80,10 +80,10 @@ define @reverse_nxv2i1( %a) { ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 2 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-256-NEXT: vand.vi v8, v10, 1 @@ -95,10 +95,10 @@ define @reverse_nxv2i1( %a) { ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-512-NEXT: vand.vi v8, v10, 1 @@ -114,11 +114,11 @@ define @reverse_nxv4i1( %a) { ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 1 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 @@ -131,10 +131,10 @@ define @reverse_nxv4i1( %a) { ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: srli a0, a0, 1 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-256-NEXT: vand.vi v8, v10, 1 @@ -146,10 +146,10 @@ define @reverse_nxv4i1( %a) { ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV32-BITS-512-NEXT: vmv.v.i v8, 0 ; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: srli a0, a0, 1 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-512-NEXT: vand.vi v8, v10, 1 @@ -161,11 +161,11 @@ define @reverse_nxv4i1( %a) { ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9 @@ -178,10 +178,10 @@ define @reverse_nxv4i1( %a) { ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 1 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-256-NEXT: vand.vi v8, v10, 1 @@ -193,10 +193,10 @@ define @reverse_nxv4i1( %a) { ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 1 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-512-NEXT: vand.vi v8, v10, 1 @@ -209,86 +209,88 @@ define @reverse_nxv4i1( %a) { define @reverse_nxv8i1( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv8i1: ; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 -; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v8 -; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v10 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v10, 0 -; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v10, v8 -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v11, 1 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 +; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv8i1: ; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV32-BITS-256-NEXT: vmv.v.i v8, 0 +; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: addi a0, a0, -1 -; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; RV32-BITS-256-NEXT: vid.v v8 -; RV32-BITS-256-NEXT: vrsub.vx v8, v8, a0 -; RV32-BITS-256-NEXT: vmv.v.i v9, 0 -; RV32-BITS-256-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-BITS-256-NEXT: vrgather.vv v10, v9, v8 +; RV32-BITS-256-NEXT: vid.v v9 +; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0 +; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-256-NEXT: vand.vi v8, v10, 1 ; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_nxv8i1: ; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV32-BITS-512-NEXT: vmv.v.i v8, 0 +; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: addi a0, a0, -1 -; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; RV32-BITS-512-NEXT: vid.v v8 -; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0 -; RV32-BITS-512-NEXT: vmv.v.i v9, 0 -; RV32-BITS-512-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-BITS-512-NEXT: vrgather.vv v10, v9, v8 +; RV32-BITS-512-NEXT: vid.v v9 +; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0 +; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV32-BITS-512-NEXT: vand.vi v8, v10, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i1: ; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 -; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v8 -; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v10 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v10, 0 -; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v10, v8 -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v11, 1 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 +; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_nxv8i1: ; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV64-BITS-256-NEXT: vmv.v.i v8, 0 +; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: addi a0, a0, -1 -; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; RV64-BITS-256-NEXT: vid.v v8 -; RV64-BITS-256-NEXT: vrsub.vx v8, v8, a0 -; RV64-BITS-256-NEXT: vmv.v.i v9, 0 -; RV64-BITS-256-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-BITS-256-NEXT: vrgather.vv v10, v9, v8 +; RV64-BITS-256-NEXT: vid.v v9 +; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 +; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-256-NEXT: vand.vi v8, v10, 1 ; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_nxv8i1: ; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; RV64-BITS-512-NEXT: vmv.v.i v8, 0 +; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: addi a0, a0, -1 -; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; RV64-BITS-512-NEXT: vid.v v8 -; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 -; RV64-BITS-512-NEXT: vmv.v.i v9, 0 -; RV64-BITS-512-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-BITS-512-NEXT: vrgather.vv v10, v9, v8 +; RV64-BITS-512-NEXT: vid.v v9 +; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 +; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 ; RV64-BITS-512-NEXT: vand.vi v8, v10, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-512-NEXT: ret @@ -302,11 +304,11 @@ define @reverse_nxv16i1( %a) { ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v12 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v12 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12 @@ -319,10 +321,10 @@ define @reverse_nxv16i1( %a) { ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-256-NEXT: vid.v v10 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 1 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vid.v v10 ; RV32-BITS-256-NEXT: vrsub.vx v10, v10, a0 ; RV32-BITS-256-NEXT: vrgather.vv v12, v8, v10 ; RV32-BITS-256-NEXT: vand.vi v8, v12, 1 @@ -334,10 +336,10 @@ define @reverse_nxv16i1( %a) { ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV32-BITS-512-NEXT: vmv.v.i v8, 0 ; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-512-NEXT: vid.v v10 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 1 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vid.v v10 ; RV32-BITS-512-NEXT: vrsub.vx v10, v10, a0 ; RV32-BITS-512-NEXT: vrgather.vv v12, v8, v10 ; RV32-BITS-512-NEXT: vand.vi v8, v12, 1 @@ -349,11 +351,11 @@ define @reverse_nxv16i1( %a) { ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12 @@ -366,10 +368,10 @@ define @reverse_nxv16i1( %a) { ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 1 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v10 ; RV64-BITS-256-NEXT: vand.vi v8, v12, 1 @@ -381,10 +383,10 @@ define @reverse_nxv16i1( %a) { ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 1 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v10 ; RV64-BITS-512-NEXT: vand.vi v8, v12, 1 @@ -400,11 +402,11 @@ define @reverse_nxv32i1( %a) { ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16 @@ -417,10 +419,10 @@ define @reverse_nxv32i1( %a) { ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-256-NEXT: vid.v v12 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 2 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vid.v v12 ; RV32-BITS-256-NEXT: vrsub.vx v12, v12, a0 ; RV32-BITS-256-NEXT: vrgather.vv v16, v8, v12 ; RV32-BITS-256-NEXT: vand.vi v8, v16, 1 @@ -432,10 +434,10 @@ define @reverse_nxv32i1( %a) { ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV32-BITS-512-NEXT: vmv.v.i v8, 0 ; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-512-NEXT: vid.v v12 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vid.v v12 ; RV32-BITS-512-NEXT: vrsub.vx v12, v12, a0 ; RV32-BITS-512-NEXT: vrgather.vv v16, v8, v12 ; RV32-BITS-512-NEXT: vand.vi v8, v16, 1 @@ -447,11 +449,11 @@ define @reverse_nxv32i1( %a) { ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16 @@ -464,10 +466,10 @@ define @reverse_nxv32i1( %a) { ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 2 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v12 ; RV64-BITS-256-NEXT: vand.vi v8, v16, 1 @@ -479,10 +481,10 @@ define @reverse_nxv32i1( %a) { ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-512-NEXT: vrgather.vv v16, v8, v12 ; RV64-BITS-512-NEXT: vand.vi v8, v16, 1 @@ -495,11 +497,11 @@ define @reverse_nxv32i1( %a) { define @reverse_nxv64i1( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v8 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v8 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 @@ -517,10 +519,10 @@ define @reverse_nxv64i1( %a) { ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-BITS-256-NEXT: vid.v v16 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 3 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vid.v v16 ; RV32-BITS-256-NEXT: vrsub.vx v16, v16, a0 ; RV32-BITS-256-NEXT: vrgather.vv v24, v8, v16 ; RV32-BITS-256-NEXT: vand.vi v8, v24, 1 @@ -529,11 +531,11 @@ define @reverse_nxv64i1( %a) { ; ; RV32-BITS-512-LABEL: reverse_nxv64i1: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vid.v v8 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV32-BITS-512-NEXT: vid.v v8 ; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-512-NEXT: vmv.v.i v16, 0 @@ -548,11 +550,11 @@ define @reverse_nxv64i1( %a) { ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v8 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v8 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 @@ -570,10 +572,10 @@ define @reverse_nxv64i1( %a) { ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 3 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: vrsub.vx v16, v16, a0 ; RV64-BITS-256-NEXT: vrgather.vv v24, v8, v16 ; RV64-BITS-256-NEXT: vand.vi v8, v24, 1 @@ -582,11 +584,11 @@ define @reverse_nxv64i1( %a) { ; ; RV64-BITS-512-LABEL: reverse_nxv64i1: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vid.v v8 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV64-BITS-512-NEXT: vid.v v8 ; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-512-NEXT: vmv.v.i v16, 0 @@ -609,11 +611,11 @@ define @reverse_nxv64i1( %a) { define @reverse_nxv1i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv1i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 3 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -622,11 +624,11 @@ define @reverse_nxv1i8( %a) { ; ; RV32-BITS-256-LABEL: reverse_nxv1i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: srli a0, a0, 3 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-256-NEXT: vmv1r.v v8, v9 @@ -634,11 +636,11 @@ define @reverse_nxv1i8( %a) { ; ; RV32-BITS-512-LABEL: reverse_nxv1i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: srli a0, a0, 3 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-512-NEXT: vmv1r.v v8, v9 @@ -646,11 +648,11 @@ define @reverse_nxv1i8( %a) { ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv1i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 3 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -659,11 +661,11 @@ define @reverse_nxv1i8( %a) { ; ; RV64-BITS-256-LABEL: reverse_nxv1i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 3 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-256-NEXT: vmv1r.v v8, v9 @@ -671,11 +673,11 @@ define @reverse_nxv1i8( %a) { ; ; RV64-BITS-512-LABEL: reverse_nxv1i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 3 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-512-NEXT: vmv1r.v v8, v9 @@ -687,11 +689,11 @@ define @reverse_nxv1i8( %a) { define @reverse_nxv2i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv2i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -700,11 +702,11 @@ define @reverse_nxv2i8( %a) { ; ; RV32-BITS-256-LABEL: reverse_nxv2i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: srli a0, a0, 2 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-256-NEXT: vmv1r.v v8, v9 @@ -712,11 +714,11 @@ define @reverse_nxv2i8( %a) { ; ; RV32-BITS-512-LABEL: reverse_nxv2i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: srli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-512-NEXT: vmv1r.v v8, v9 @@ -724,11 +726,11 @@ define @reverse_nxv2i8( %a) { ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv2i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -737,11 +739,11 @@ define @reverse_nxv2i8( %a) { ; ; RV64-BITS-256-LABEL: reverse_nxv2i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 2 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-256-NEXT: vmv1r.v v8, v9 @@ -749,11 +751,11 @@ define @reverse_nxv2i8( %a) { ; ; RV64-BITS-512-LABEL: reverse_nxv2i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-512-NEXT: vmv1r.v v8, v9 @@ -765,11 +767,11 @@ define @reverse_nxv2i8( %a) { define @reverse_nxv4i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv4i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 1 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v9 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -778,11 +780,11 @@ define @reverse_nxv4i8( %a) { ; ; RV32-BITS-256-LABEL: reverse_nxv4i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: srli a0, a0, 1 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV32-BITS-256-NEXT: vid.v v9 ; RV32-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-256-NEXT: vmv1r.v v8, v9 @@ -790,11 +792,11 @@ define @reverse_nxv4i8( %a) { ; ; RV32-BITS-512-LABEL: reverse_nxv4i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: srli a0, a0, 1 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV32-BITS-512-NEXT: vid.v v9 ; RV32-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV32-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV32-BITS-512-NEXT: vmv1r.v v8, v9 @@ -802,11 +804,11 @@ define @reverse_nxv4i8( %a) { ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv4i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 @@ -815,11 +817,11 @@ define @reverse_nxv4i8( %a) { ; ; RV64-BITS-256-LABEL: reverse_nxv4i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 1 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-256-NEXT: vmv1r.v v8, v9 @@ -827,11 +829,11 @@ define @reverse_nxv4i8( %a) { ; ; RV64-BITS-512-LABEL: reverse_nxv4i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 1 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v9, v8, v10 ; RV64-BITS-512-NEXT: vmv1r.v v8, v9 @@ -915,11 +917,11 @@ define @reverse_nxv8i8( %a) { define @reverse_nxv16i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv16i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v12 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v12 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12 @@ -928,11 +930,11 @@ define @reverse_nxv16i8( %a) { ; ; RV32-BITS-256-LABEL: reverse_nxv16i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV32-BITS-256-NEXT: vid.v v10 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 1 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; RV32-BITS-256-NEXT: vid.v v10 ; RV32-BITS-256-NEXT: vrsub.vx v12, v10, a0 ; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v12 ; RV32-BITS-256-NEXT: vmv.v.v v8, v10 @@ -940,11 +942,11 @@ define @reverse_nxv16i8( %a) { ; ; RV32-BITS-512-LABEL: reverse_nxv16i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV32-BITS-512-NEXT: vid.v v10 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 1 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; RV32-BITS-512-NEXT: vid.v v10 ; RV32-BITS-512-NEXT: vrsub.vx v12, v10, a0 ; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v12 ; RV32-BITS-512-NEXT: vmv.v.v v8, v10 @@ -952,11 +954,11 @@ define @reverse_nxv16i8( %a) { ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv16i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12 @@ -965,11 +967,11 @@ define @reverse_nxv16i8( %a) { ; ; RV64-BITS-256-LABEL: reverse_nxv16i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 1 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: vrsub.vx v12, v10, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v12 ; RV64-BITS-256-NEXT: vmv.v.v v8, v10 @@ -977,11 +979,11 @@ define @reverse_nxv16i8( %a) { ; ; RV64-BITS-512-LABEL: reverse_nxv16i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 1 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: vrsub.vx v12, v10, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v12 ; RV64-BITS-512-NEXT: vmv.v.v v8, v10 @@ -993,11 +995,11 @@ define @reverse_nxv16i8( %a) { define @reverse_nxv32i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv32i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16 @@ -1006,11 +1008,11 @@ define @reverse_nxv32i8( %a) { ; ; RV32-BITS-256-LABEL: reverse_nxv32i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-256-NEXT: vid.v v12 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 2 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV32-BITS-256-NEXT: vid.v v12 ; RV32-BITS-256-NEXT: vrsub.vx v16, v12, a0 ; RV32-BITS-256-NEXT: vrgather.vv v12, v8, v16 ; RV32-BITS-256-NEXT: vmv.v.v v8, v12 @@ -1018,11 +1020,11 @@ define @reverse_nxv32i8( %a) { ; ; RV32-BITS-512-LABEL: reverse_nxv32i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vid.v v12 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV32-BITS-512-NEXT: vid.v v12 ; RV32-BITS-512-NEXT: vrsub.vx v16, v12, a0 ; RV32-BITS-512-NEXT: vrgather.vv v12, v8, v16 ; RV32-BITS-512-NEXT: vmv.v.v v8, v12 @@ -1030,11 +1032,11 @@ define @reverse_nxv32i8( %a) { ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv32i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16 @@ -1043,11 +1045,11 @@ define @reverse_nxv32i8( %a) { ; ; RV64-BITS-256-LABEL: reverse_nxv32i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 2 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: vrsub.vx v16, v12, a0 ; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v16 ; RV64-BITS-256-NEXT: vmv.v.v v8, v12 @@ -1055,11 +1057,11 @@ define @reverse_nxv32i8( %a) { ; ; RV64-BITS-512-LABEL: reverse_nxv32i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: vrsub.vx v16, v12, a0 ; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v16 ; RV64-BITS-512-NEXT: vmv.v.v v8, v12 @@ -1071,11 +1073,11 @@ define @reverse_nxv32i8( %a) { define @reverse_nxv64i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i8: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 @@ -1085,11 +1087,11 @@ define @reverse_nxv64i8( %a) { ; ; RV32-BITS-256-LABEL: reverse_nxv64i8: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-256-NEXT: vid.v v16 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: slli a0, a0, 3 ; RV32-BITS-256-NEXT: addi a0, a0, -1 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; RV32-BITS-256-NEXT: vid.v v16 ; RV32-BITS-256-NEXT: vrsub.vx v24, v16, a0 ; RV32-BITS-256-NEXT: vrgather.vv v16, v8, v24 ; RV32-BITS-256-NEXT: vmv.v.v v8, v16 @@ -1097,11 +1099,11 @@ define @reverse_nxv64i8( %a) { ; ; RV32-BITS-512-LABEL: reverse_nxv64i8: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vid.v v16 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV32-BITS-512-NEXT: vid.v v16 ; RV32-BITS-512-NEXT: vrsub.vx v24, v16, a0 ; RV32-BITS-512-NEXT: vrgather.vv v20, v8, v24 ; RV32-BITS-512-NEXT: vrgather.vv v16, v12, v24 @@ -1110,11 +1112,11 @@ define @reverse_nxv64i8( %a) { ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i8: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 @@ -1124,11 +1126,11 @@ define @reverse_nxv64i8( %a) { ; ; RV64-BITS-256-LABEL: reverse_nxv64i8: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 3 ; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: vrsub.vx v24, v16, a0 ; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v24 ; RV64-BITS-256-NEXT: vmv.v.v v8, v16 @@ -1136,11 +1138,11 @@ define @reverse_nxv64i8( %a) { ; ; RV64-BITS-512-LABEL: reverse_nxv64i8: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vid.v v16 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; RV64-BITS-512-NEXT: vid.v v16 ; RV64-BITS-512-NEXT: vrsub.vx v24, v16, a0 ; RV64-BITS-512-NEXT: vrgather.vv v20, v8, v24 ; RV64-BITS-512-NEXT: vrgather.vv v16, v12, v24 @@ -1153,11 +1155,11 @@ define @reverse_nxv64i8( %a) { define @reverse_nxv1i16( %a) { ; CHECK-LABEL: reverse_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1169,11 +1171,11 @@ define @reverse_nxv1i16( %a) { define @reverse_nxv2i16( %a) { ; CHECK-LABEL: reverse_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1185,11 +1187,11 @@ define @reverse_nxv2i16( %a) { define @reverse_nxv4i16( %a) { ; CHECK-LABEL: reverse_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1216,11 +1218,11 @@ define @reverse_nxv8i16( %a) { define @reverse_nxv16i16( %a) { ; CHECK-LABEL: reverse_nxv16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -1232,11 +1234,11 @@ define @reverse_nxv16i16( %a) { define @reverse_nxv32i16( %a) { ; CHECK-LABEL: reverse_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 @@ -1248,11 +1250,11 @@ define @reverse_nxv32i16( %a) { define @reverse_nxv1i32( %a) { ; CHECK-LABEL: reverse_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1264,11 +1266,11 @@ define @reverse_nxv1i32( %a) { define @reverse_nxv2i32( %a) { ; CHECK-LABEL: reverse_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1280,11 +1282,11 @@ define @reverse_nxv2i32( %a) { define @reverse_nxv4i32( %a) { ; CHECK-LABEL: reverse_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 @@ -1311,11 +1313,11 @@ define @reverse_nxv8i32( %a) { define @reverse_nxv16i32( %a) { ; CHECK-LABEL: reverse_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 @@ -1327,11 +1329,11 @@ define @reverse_nxv16i32( %a) { define @reverse_nxv1i64( %a) { ; CHECK-LABEL: reverse_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1343,11 +1345,11 @@ define @reverse_nxv1i64( %a) { define @reverse_nxv2i64( %a) { ; CHECK-LABEL: reverse_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 @@ -1359,11 +1361,11 @@ define @reverse_nxv2i64( %a) { define @reverse_nxv4i64( %a) { ; CHECK-LABEL: reverse_nxv4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -1394,11 +1396,11 @@ define @reverse_nxv8i64( %a) { define @reverse_nxv1f16( %a) { ; CHECK-LABEL: reverse_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1410,11 +1412,11 @@ define @reverse_nxv1f16( %a) { define @reverse_nxv2f16( %a) { ; CHECK-LABEL: reverse_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1426,11 +1428,11 @@ define @reverse_nxv2f16( %a) { define @reverse_nxv4f16( %a) { ; CHECK-LABEL: reverse_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1457,11 +1459,11 @@ define @reverse_nxv8f16( %a) { define @reverse_nxv16f16( %a) { ; CHECK-LABEL: reverse_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -1473,11 +1475,11 @@ define @reverse_nxv16f16( %a) { define @reverse_nxv32f16( %a) { ; CHECK-LABEL: reverse_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 @@ -1489,11 +1491,11 @@ define @reverse_nxv32f16( %a) { define @reverse_nxv1f32( %a) { ; CHECK-LABEL: reverse_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 @@ -1505,11 +1507,11 @@ define @reverse_nxv1f32( %a) { define @reverse_nxv2f32( %a) { ; CHECK-LABEL: reverse_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1521,11 +1523,11 @@ define @reverse_nxv2f32( %a) { define @reverse_nxv4f32( %a) { ; CHECK-LABEL: reverse_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 @@ -1552,11 +1554,11 @@ define @reverse_nxv8f32( %a) { define @reverse_nxv16f32( %a) { ; CHECK-LABEL: reverse_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 @@ -1568,11 +1570,11 @@ define @reverse_nxv16f32( %a) { define @reverse_nxv1f64( %a) { ; CHECK-LABEL: reverse_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 @@ -1584,11 +1586,11 @@ define @reverse_nxv1f64( %a) { define @reverse_nxv2f64( %a) { ; CHECK-LABEL: reverse_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v12, v10, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 @@ -1600,11 +1602,11 @@ define @reverse_nxv2f64( %a) { define @reverse_nxv4f64( %a) { ; CHECK-LABEL: reverse_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v16, v12, a0 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -1633,11 +1635,11 @@ define @reverse_nxv8f64( %a) { define @reverse_nxv3i64( %a) { ; CHECK-LABEL: reverse_nxv3i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v12, v12, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v12 ; CHECK-NEXT: vmv1r.v v8, v17 diff --git a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll index cb4b20662e241..1ad8e2d66392a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll @@ -15,8 +15,7 @@ define @foo( %x, @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll index 2322256af5f8f..c44f5ebcde482 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -21,8 +21,8 @@ define @foo( %a, @foo( %a, @foo( %a, @foo( %a, @fcmp_ord_vf_nxv1f16( %va, half %b, ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -250,8 +250,8 @@ define @fcmp_ord_vf_swap_nxv1f16( %va, half ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -500,8 +500,8 @@ define @fcmp_uno_vf_nxv1f16( %va, half %b, ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -516,8 +516,8 @@ define @fcmp_uno_vf_swap_nxv1f16( %va, half ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 @@ -783,9 +783,9 @@ define @fcmp_ord_vf_nxv8f16( %va, half %b, ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v10, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v12, v8 +; CHECK-NEXT: vmfeq.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v10, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -799,9 +799,9 @@ define @fcmp_ord_vf_swap_nxv8f16( %va, half ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v10, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v8, v12 +; CHECK-NEXT: vmfeq.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v12, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1052,9 +1052,9 @@ define @fcmp_uno_vf_nxv8f16( %va, half %b, ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v10, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v8 +; CHECK-NEXT: vmfne.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v10, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1068,9 +1068,9 @@ define @fcmp_uno_vf_swap_nxv8f16( %va, half ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v10, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v8, v12 +; CHECK-NEXT: vmfne.vf v12, v10, fa0, v0.t +; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v12, v10 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1369,8 +1369,8 @@ define @fcmp_ord_vf_nxv1f64( %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1385,8 +1385,8 @@ define @fcmp_ord_vf_swap_nxv1f64( %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1635,8 +1635,8 @@ define @fcmp_uno_vf_nxv1f64( %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1651,8 +1651,8 @@ define @fcmp_uno_vf_swap_nxv1f64( %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 @@ -1919,9 +1919,9 @@ define @fcmp_ord_vf_nxv8f64( %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v16, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v24, v8 +; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v16, v24 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1935,9 +1935,9 @@ define @fcmp_ord_vf_swap_nxv8f64( %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t -; CHECK-NEXT: vmfeq.vf v8, v16, fa0, v0.t -; CHECK-NEXT: vmand.mm v0, v8, v24 +; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v24, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2188,9 +2188,9 @@ define @fcmp_uno_vf_nxv8f64( %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v16, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v24, v8 +; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v16, v24 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2204,9 +2204,9 @@ define @fcmp_uno_vf_swap_nxv8f64( %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v8, v8, v0.t -; CHECK-NEXT: vmfne.vf v8, v16, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v8, v24 +; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t +; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v24, v16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2239,27 +2239,27 @@ define @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @icmp_eq_vv_nxv128i8( %va, @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; RV32-NEXT: addi a0, a0, %lo(.LCPI15_0) ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; RV32-NEXT: vle16.v v20, (a0) -; RV32-NEXT: vmv2r.v v16, v10 -; RV32-NEXT: vmv2r.v v12, v8 -; RV32-NEXT: vrgather.vv v8, v12, v20 -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vrsub.vi v12, v12, 15 +; RV32-NEXT: vle16.v v16, (a0) +; RV32-NEXT: vmv2r.v v20, v10 +; RV32-NEXT: vrgather.vv v12, v8, v16 +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vrsub.vi v8, v8, 15 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV32-NEXT: vrgather.vv v8, v16, v12, v0.t +; RV32-NEXT: vrgather.vv v12, v20, v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: v16i16_2: @@ -276,18 +276,18 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; RV64-NEXT: addi a0, a0, %lo(.LCPI15_0) ; RV64-NEXT: li a1, 32 ; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; RV64-NEXT: vle16.v v20, (a0) -; RV64-NEXT: vmv2r.v v16, v10 -; RV64-NEXT: vmv2r.v v12, v8 -; RV64-NEXT: vrgather.vv v8, v12, v20 -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vrsub.vi v12, v12, 15 +; RV64-NEXT: vle16.v v16, (a0) +; RV64-NEXT: vmv2r.v v20, v10 +; RV64-NEXT: vrgather.vv v12, v8, v16 +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vrsub.vi v8, v8, 15 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV64-NEXT: vrgather.vv v8, v16, v12, v0.t +; RV64-NEXT: vrgather.vv v12, v20, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %v32i16 @@ -497,12 +497,12 @@ define <8 x i64> @v4i64_2(<4 x i64> %a, <4 x i64> %b) { ; RV32: # %bb.0: ; RV32-NEXT: vmv2r.v v16, v10 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vid.v v18 -; RV32-NEXT: vrsub.vi v19, v18, 7 +; RV32-NEXT: vid.v v10 +; RV32-NEXT: vrsub.vi v11, v10, 7 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v12, v8, v19 +; RV32-NEXT: vrgatherei16.vv v12, v8, v11 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vrsub.vi v8, v18, 3 +; RV32-NEXT: vrsub.vi v8, v10, 3 ; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t @@ -791,12 +791,12 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) { ; RV32: # %bb.0: ; RV32-NEXT: vmv2r.v v16, v10 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vid.v v18 -; RV32-NEXT: vrsub.vi v19, v18, 7 +; RV32-NEXT: vid.v v10 +; RV32-NEXT: vrsub.vi v11, v10, 7 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v12, v8, v19 +; RV32-NEXT: vrgatherei16.vv v12, v8, v11 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vrsub.vi v8, v18, 3 +; RV32-NEXT: vrsub.vi v8, v10, 3 ; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll index e5d49f5cad01b..05041632d2926 100644 --- a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll @@ -36,16 +36,16 @@ define i32 @splat_vector_split_i64() { ; CHECK-NEXT: vsrl.vx v12, v8, a1 ; CHECK-NEXT: lui a2, 16 ; CHECK-NEXT: addi a2, a2, -256 -; CHECK-NEXT: mv a3, sp -; CHECK-NEXT: vlse64.v v14, (a3), zero ; CHECK-NEXT: vand.vx v12, v12, a2 ; CHECK-NEXT: vor.vv v10, v12, v10 -; CHECK-NEXT: vsrl.vi v12, v8, 8 -; CHECK-NEXT: vand.vv v12, v12, v14 -; CHECK-NEXT: vsrl.vi v16, v8, 24 +; CHECK-NEXT: vsrl.vi v12, v8, 24 +; CHECK-NEXT: mv a3, sp +; CHECK-NEXT: vlse64.v v14, (a3), zero ; CHECK-NEXT: lui a3, 4080 -; CHECK-NEXT: vand.vx v16, v16, a3 -; CHECK-NEXT: vor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v12, v12, a3 +; CHECK-NEXT: vsrl.vi v16, v8, 8 +; CHECK-NEXT: vand.vv v16, v16, v14 +; CHECK-NEXT: vor.vv v12, v16, v12 ; CHECK-NEXT: vor.vv v10, v12, v10 ; CHECK-NEXT: vand.vv v12, v8, v14 ; CHECK-NEXT: vsll.vi v12, v12, 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll index f1dd8a3dcddf6..443fe93a618c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll @@ -10,17 +10,17 @@ define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: vec_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: vmv.v.x v11, a1 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vsll.vv v10, v8, v9 +; CHECK-NEXT: vsra.vv v9, v10, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv.v.x v9, a1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v8, v11, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y) ret <2 x i64> %tmp @@ -30,18 +30,18 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: addiw a0, a0, -1 -; CHECK-NEXT: vmv.v.x v11, a0 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vsll.vv v10, v8, v9 +; CHECK-NEXT: vsra.vv v9, v10, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: slli a0, a0, 31 -; CHECK-NEXT: vmerge.vxm v8, v11, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp @@ -51,16 +51,16 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; CHECK-LABEL: vec_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addiw a1, a0, -1 -; CHECK-NEXT: vmsle.vi v0, v8, -1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vsll.vv v10, v8, v9 +; CHECK-NEXT: vsra.vv v9, v10, v9 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y) ret <8 x i16> %tmp @@ -70,16 +70,16 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-LABEL: vec_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: li a0, 127 ; CHECK-NEXT: vsll.vv v10, v8, v9 ; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v9, v8, v9 -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.v.x v11, a0 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmsne.vv v8, v8, v9 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v8, v11, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) ret <16 x i8> %tmp @@ -94,15 +94,15 @@ define @vec_nxv2i64( %x, ; CHECK-LABEL: vec_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: srli a1, a0, 1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: vmv.v.x v14, a1 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v8, v14, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret @@ -114,16 +114,16 @@ define @vec_nxv4i32( %x, ; CHECK-LABEL: vec_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addiw a0, a0, -1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: addiw a0, a0, -1 -; CHECK-NEXT: vmv.v.x v14, a0 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: slli a0, a0, 31 -; CHECK-NEXT: vmerge.vxm v8, v14, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret @@ -135,12 +135,12 @@ define @vec_nxv8i16( %x, ; CHECK-LABEL: vec_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addiw a1, a0, -1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addiw a1, a0, -1 -; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 @@ -154,14 +154,14 @@ define @vec_nxv16i8( %x, ; CHECK-LABEL: vec_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: li a0, 127 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.v.x v14, a0 -; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v8, v14, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll index 9bd00d33ac7b5..6ce307146be19 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -562,9 +562,9 @@ define @stepvector_nxv16i64() { ; ; RV64-LABEL: stepvector_nxv16i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vid.v v8 ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vid.v v8 ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret %v = call @llvm.experimental.stepvector.nxv16i64() @@ -591,11 +591,11 @@ define @add_stepvector_nxv16i64() { ; ; RV64-LABEL: add_stepvector_nxv16i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret entry: @@ -680,15 +680,15 @@ define @mul_bigimm_stepvector_nxv16i64() { ; ; RV64-LABEL: mul_bigimm_stepvector_nxv16i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: lui a1, 1987 +; RV64-NEXT: addiw a1, a1, -731 +; RV64-NEXT: slli a1, a1, 12 +; RV64-NEXT: addi a1, a1, -683 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vid.v v8 -; RV64-NEXT: lui a0, 1987 -; RV64-NEXT: addiw a0, a0, -731 -; RV64-NEXT: slli a0, a0, 12 -; RV64-NEXT: addi a0, a0, -683 -; RV64-NEXT: vmul.vx v8, v8, a0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: mul a0, a1, a0 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret entry: @@ -719,11 +719,11 @@ define @shl_stepvector_nxv16i64() { ; ; RV64-LABEL: shl_stepvector_nxv16i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vsll.vi v8, v8, 2 ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 63873a115d5ce..718e68505a93a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -771,17 +771,17 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @llvm.experimental.vp.strided.load.nxv16f64.p0.i6 define @strided_load_nxv17f64(ptr %ptr, i64 %stride, %mask, i32 zeroext %evl, * %hi_ptr) { ; CHECK-RV32-LABEL: strided_load_nxv17f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: csrr a5, vlenb -; CHECK-RV32-NEXT: slli a7, a5, 1 +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a7, a2, 1 ; CHECK-RV32-NEXT: vmv1r.v v8, v0 -; CHECK-RV32-NEXT: mv a2, a3 +; CHECK-RV32-NEXT: mv a6, a3 ; CHECK-RV32-NEXT: bltu a3, a7, .LBB44_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a2, a7 +; CHECK-RV32-NEXT: mv a6, a7 ; CHECK-RV32-NEXT: .LBB44_2: -; CHECK-RV32-NEXT: sub a6, a2, a5 -; CHECK-RV32-NEXT: sltu t0, a2, a6 +; CHECK-RV32-NEXT: sub a5, a6, a2 +; CHECK-RV32-NEXT: sltu t0, a6, a5 ; CHECK-RV32-NEXT: addi t0, t0, -1 -; CHECK-RV32-NEXT: and t0, t0, a6 -; CHECK-RV32-NEXT: srli a6, a5, 3 -; CHECK-RV32-NEXT: vsetvli t1, zero, e8, mf4, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a6 -; CHECK-RV32-NEXT: mv a6, a2 -; CHECK-RV32-NEXT: bltu a2, a5, .LBB44_4 +; CHECK-RV32-NEXT: and t0, t0, a5 +; CHECK-RV32-NEXT: mv a5, a6 +; CHECK-RV32-NEXT: bltu a6, a2, .LBB44_4 ; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: mv a6, a5 +; CHECK-RV32-NEXT: mv a5, a2 ; CHECK-RV32-NEXT: .LBB44_4: -; CHECK-RV32-NEXT: mul t1, a6, a1 +; CHECK-RV32-NEXT: mul t1, a5, a1 ; CHECK-RV32-NEXT: add t1, a0, t1 +; CHECK-RV32-NEXT: srli t2, a2, 3 +; CHECK-RV32-NEXT: vsetvli t3, zero, e8, mf4, ta, ma +; CHECK-RV32-NEXT: vslidedown.vx v0, v8, t2 ; CHECK-RV32-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v16, (t1), a1, v0.t ; CHECK-RV32-NEXT: sub a7, a3, a7 ; CHECK-RV32-NEXT: sltu a3, a3, a7 ; CHECK-RV32-NEXT: addi a3, a3, -1 ; CHECK-RV32-NEXT: and a3, a3, a7 -; CHECK-RV32-NEXT: bltu a3, a5, .LBB44_6 +; CHECK-RV32-NEXT: bltu a3, a2, .LBB44_6 ; CHECK-RV32-NEXT: # %bb.5: -; CHECK-RV32-NEXT: mv a3, a5 +; CHECK-RV32-NEXT: mv a3, a2 ; CHECK-RV32-NEXT: .LBB44_6: -; CHECK-RV32-NEXT: srli a5, a5, 2 +; CHECK-RV32-NEXT: mul a6, a6, a1 +; CHECK-RV32-NEXT: add a6, a0, a6 +; CHECK-RV32-NEXT: srli a2, a2, 2 ; CHECK-RV32-NEXT: vsetvli a7, zero, e8, mf2, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a5 -; CHECK-RV32-NEXT: mul a2, a2, a1 -; CHECK-RV32-NEXT: add a2, a0, a2 +; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a2 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v24, (a2), a1, v0.t -; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v24, (a6), a1, v0.t +; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-RV32-NEXT: vs1r.v v24, (a4) @@ -914,46 +914,46 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, %v, ptr %ptr, i32 signext %stride, %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_store_nxv16f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: csrr a4, vlenb -; CHECK-RV32-NEXT: mv a3, a2 -; CHECK-RV32-NEXT: bltu a2, a4, .LBB34_2 +; CHECK-RV32-NEXT: csrr a3, vlenb +; CHECK-RV32-NEXT: mv a4, a2 +; CHECK-RV32-NEXT: bltu a2, a3, .LBB34_2 ; CHECK-RV32-NEXT: # %bb.1: -; CHECK-RV32-NEXT: mv a3, a4 +; CHECK-RV32-NEXT: mv a4, a3 ; CHECK-RV32-NEXT: .LBB34_2: -; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV32-NEXT: vsse64.v v8, (a0), a1, v0.t -; CHECK-RV32-NEXT: sub a5, a2, a4 +; CHECK-RV32-NEXT: sub a5, a2, a3 ; CHECK-RV32-NEXT: sltu a2, a2, a5 ; CHECK-RV32-NEXT: addi a2, a2, -1 ; CHECK-RV32-NEXT: and a2, a2, a5 -; CHECK-RV32-NEXT: srli a4, a4, 3 -; CHECK-RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-RV32-NEXT: mul a3, a3, a1 -; CHECK-RV32-NEXT: add a0, a0, a3 +; CHECK-RV32-NEXT: mul a4, a4, a1 +; CHECK-RV32-NEXT: add a0, a0, a4 +; CHECK-RV32-NEXT: srli a3, a3, 3 +; CHECK-RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV32-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_store_nxv16f64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: csrr a4, vlenb -; CHECK-RV64-NEXT: mv a3, a2 -; CHECK-RV64-NEXT: bltu a2, a4, .LBB34_2 +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: mv a4, a2 +; CHECK-RV64-NEXT: bltu a2, a3, .LBB34_2 ; CHECK-RV64-NEXT: # %bb.1: -; CHECK-RV64-NEXT: mv a3, a4 +; CHECK-RV64-NEXT: mv a4, a3 ; CHECK-RV64-NEXT: .LBB34_2: -; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV64-NEXT: vsse64.v v8, (a0), a1, v0.t -; CHECK-RV64-NEXT: sub a5, a2, a4 +; CHECK-RV64-NEXT: sub a5, a2, a3 ; CHECK-RV64-NEXT: sltu a2, a2, a5 ; CHECK-RV64-NEXT: addi a2, a2, -1 ; CHECK-RV64-NEXT: and a2, a2, a5 -; CHECK-RV64-NEXT: srli a4, a4, 3 -; CHECK-RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a4 -; CHECK-RV64-NEXT: mul a3, a3, a1 -; CHECK-RV64-NEXT: add a0, a0, a3 +; CHECK-RV64-NEXT: mul a4, a4, a1 +; CHECK-RV64-NEXT: add a0, a0, a4 +; CHECK-RV64-NEXT: srli a3, a3, 3 +; CHECK-RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV64-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-RV64-NEXT: ret @@ -749,11 +749,11 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-RV32-NEXT: sltu t0, a5, a0 ; CHECK-RV32-NEXT: addi t0, t0, -1 ; CHECK-RV32-NEXT: and a0, t0, a0 +; CHECK-RV32-NEXT: mul a7, a7, a2 +; CHECK-RV32-NEXT: add a7, a1, a7 ; CHECK-RV32-NEXT: srli t0, a4, 3 ; CHECK-RV32-NEXT: vsetvli t1, zero, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vx v0, v24, t0 -; CHECK-RV32-NEXT: mul a7, a7, a2 -; CHECK-RV32-NEXT: add a7, a1, a7 ; CHECK-RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-RV32-NEXT: sub a0, a3, a6 ; CHECK-RV32-NEXT: sltu a3, a3, a0 @@ -764,11 +764,11 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-RV32-NEXT: # %bb.5: ; CHECK-RV32-NEXT: mv a0, a4 ; CHECK-RV32-NEXT: .LBB36_6: +; CHECK-RV32-NEXT: mul a3, a5, a2 +; CHECK-RV32-NEXT: add a1, a1, a3 ; CHECK-RV32-NEXT: srli a4, a4, 2 ; CHECK-RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-RV32-NEXT: vslidedown.vx v0, v24, a4 -; CHECK-RV32-NEXT: mul a3, a5, a2 -; CHECK-RV32-NEXT: add a1, a1, a3 ; CHECK-RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -810,11 +810,11 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-RV64-NEXT: sltu t0, a5, a0 ; CHECK-RV64-NEXT: addi t0, t0, -1 ; CHECK-RV64-NEXT: and a0, t0, a0 +; CHECK-RV64-NEXT: mul a7, a7, a2 +; CHECK-RV64-NEXT: add a7, a1, a7 ; CHECK-RV64-NEXT: srli t0, a4, 3 ; CHECK-RV64-NEXT: vsetvli t1, zero, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vx v0, v24, t0 -; CHECK-RV64-NEXT: mul a7, a7, a2 -; CHECK-RV64-NEXT: add a7, a1, a7 ; CHECK-RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-RV64-NEXT: sub a0, a3, a6 ; CHECK-RV64-NEXT: sltu a3, a3, a0 @@ -825,11 +825,11 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-RV64-NEXT: # %bb.5: ; CHECK-RV64-NEXT: mv a0, a4 ; CHECK-RV64-NEXT: .LBB36_6: +; CHECK-RV64-NEXT: mul a3, a5, a2 +; CHECK-RV64-NEXT: add a1, a1, a3 ; CHECK-RV64-NEXT: srli a4, a4, 2 ; CHECK-RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-RV64-NEXT: vslidedown.vx v0, v24, a4 -; CHECK-RV64-NEXT: mul a3, a5, a2 -; CHECK-RV64-NEXT: add a1, a1, a3 ; CHECK-RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index 5348eb38874eb..0dd57e1be277b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -107,16 +107,14 @@ ret {<4 x i32>, <4 x i32>} %retval define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) { ; CHECK-LABEL: vector_deinterleave_v2i64_v4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vrgather.vi v10, v8, 1 -; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vrgather.vi v10, v12, 1, v0.t -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: vmv.v.v v9, v10 +; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t +; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: ret %retval = call {<2 x i64>, <2 x i64>} @llvm.experimental.vector.deinterleave2.v4i64(<4 x i64> %vec) ret {<2 x i64>, <2 x i64>} %retval @@ -196,16 +194,14 @@ ret {<4 x float>, <4 x float>} %retval define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double> %vec) { ; CHECK-LABEL: vector_deinterleave_v2f64_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vrgather.vi v10, v8, 1 -; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vrgather.vi v10, v12, 1, v0.t -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: vmv.v.v v9, v10 +; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t +; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: ret %retval = call {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double> %vec) ret {<2 x double>, <2 x double>} %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index dd3466c91ad86..dde54d36d55a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -104,51 +104,74 @@ define {, } @vector_deinterleave_load_nxv8i6 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: li a2, 40 ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vl8re64.v v16, (a1) -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v0, v8, v8 -; CHECK-NEXT: vrgather.vv v8, v16, v0 +; CHECK-NEXT: vl8re64.v v24, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vv v16, v8, v8 +; CHECK-NEXT: vrgather.vv v8, v0, v16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vrgather.vv v8, v24, v0 -; CHECK-NEXT: vadd.vi v0, v0, 1 -; CHECK-NEXT: vrgather.vv v24, v16, v0 +; CHECK-NEXT: vrgather.vv v0, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vadd.vi v24, v16, 1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v12, v24 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vrgather.vv v16, v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vrgather.vv v24, v0, v8 +; CHECK-NEXT: vmv4r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v20, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v12, v24 +; CHECK-NEXT: vmv4r.v v20, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 7ad1e1fdf4244..b6cb7f9f5ff10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -7,16 +7,15 @@ define {, } @vector_deinterleave_nxv16i1_nxv32i1( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 ; CHECK-NEXT: vnsrl.wi v8, v12, 0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vnsrl.wi v10, v12, 8 @@ -91,39 +90,24 @@ declare {, } @llvm.experimental.vector.deint define {, } @vector_deinterleave_nxv64i1_nxv128i1( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv64i1_nxv128i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v28, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v24, 0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 +; CHECK-NEXT: vnsrl.wi v24, v16, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v28 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v24, 0 +; CHECK-NEXT: vnsrl.wi v28, v8, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v4, v16, 8 -; CHECK-NEXT: vnsrl.wi v0, v24, 8 +; CHECK-NEXT: vnsrl.wi v24, v16, 8 +; CHECK-NEXT: vnsrl.wi v28, v8, 8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v8, v0, 0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmsne.vi v8, v24, 0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv128i1( %vec) ret {, } %retval @@ -134,10 +118,10 @@ define {, } @vector_deinterleave_nxv64i8_nxv ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v8, v24, 0 -; CHECK-NEXT: vnsrl.wi v4, v16, 8 +; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v0, v24, 8 +; CHECK-NEXT: vnsrl.wi v4, v16, 8 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv128i8( %vec) @@ -149,10 +133,10 @@ define {, } @vector_deinterleave_nxv32i16_ ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v8, v24, 0 -; CHECK-NEXT: vnsrl.wi v4, v16, 16 +; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v0, v24, 16 +; CHECK-NEXT: vnsrl.wi v4, v16, 16 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv64i16( %vec) @@ -167,8 +151,8 @@ define {, } @vector_deinterleave_nxv16i32_ ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wx v20, v24, a0 ; CHECK-NEXT: vnsrl.wx v16, v8, a0 -; CHECK-NEXT: vnsrl.wi v4, v24, 0 ; CHECK-NEXT: vnsrl.wi v0, v8, 0 +; CHECK-NEXT: vnsrl.wi v4, v24, 0 ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32i32( %vec) @@ -355,10 +339,10 @@ define {, } @vector_deinterleave_nxv32f1 ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v8, v24, 0 -; CHECK-NEXT: vnsrl.wi v4, v16, 16 +; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vnsrl.wi v0, v24, 16 +; CHECK-NEXT: vnsrl.wi v4, v16, 16 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv64f16( %vec) @@ -373,8 +357,8 @@ define {, } @vector_deinterleave_nxv16 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wx v20, v24, a0 ; CHECK-NEXT: vnsrl.wx v16, v8, a0 -; CHECK-NEXT: vnsrl.wi v4, v24, 0 ; CHECK-NEXT: vnsrl.wi v0, v8, 0 +; CHECK-NEXT: vnsrl.wi v4, v24, 0 ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32f32( %vec) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll index f6e33cb8ff1ef..02927c60dc4dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -7,13 +7,12 @@ define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) { ; CHECK-LABEL: vector_interleave_v32i1_v16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v0, v8, 2 +; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index 65150e7d1d528..c23c10205e6e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -106,13 +106,13 @@ define void @vector_interleave_store_nxv16i64_nxv8i64( %a, @vector_interleave_nxv8i32_nxv4i32( define @vector_interleave_nxv4i64_nxv2i64( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv4i64_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vsrl.vi v16, v12, 1 -; CHECK-NEXT: vand.vi v12, v12, 1 -; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vand.vi v13, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v13, 0 +; CHECK-NEXT: vsrl.vi v16, v12, 1 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -177,13 +177,13 @@ define @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv8f32_nxv4f32( @vector_interleave_nxv4f64_nxv2f64( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv4f64_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vsrl.vi v16, v12, 1 -; CHECK-NEXT: vand.vi v12, v12, 1 -; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vand.vi v13, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v13, 0 +; CHECK-NEXT: vsrl.vi v16, v12, 1 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -355,13 +355,13 @@ define @vector_interleave_nxv16f64_nxv8f64( @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v16, v0, a3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v24, (a3) -; CHECK-NEXT: slli a3, a1, 1 -; CHECK-NEXT: sub a4, a2, a3 -; CHECK-NEXT: sltu a5, a2, a4 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a4, a5, a4 -; CHECK-NEXT: sub a5, a4, a1 -; CHECK-NEXT: sltu a6, a4, a5 +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v16, v0, a4 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8re64.v v8, (a4) +; CHECK-NEXT: slli a4, a1, 1 +; CHECK-NEXT: sub a5, a2, a4 +; CHECK-NEXT: sltu a6, a2, a5 ; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a6, a6, a5 -; CHECK-NEXT: srli a5, a1, 3 -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: vslidedown.vx v0, v16, a5 +; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: sub a6, a5, a1 +; CHECK-NEXT: sltu a7, a5, a6 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a6, a7, a6 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vslidedown.vx v0, v16, a3 ; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t -; CHECK-NEXT: bltu a4, a1, .LBB8_2 +; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t +; CHECK-NEXT: bltu a5, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a1 +; CHECK-NEXT: mv a5, a1 ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v2, v1, a5 -; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vx v2, v1, a3 +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: bltu a2, a3, .LBB8_4 +; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t +; CHECK-NEXT: bltu a2, a4, .LBB8_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: mv a2, a4 ; CHECK-NEXT: .LBB8_4: ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll index 6ea27eb4e6350..f07a1e8997a2a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll @@ -14,8 +14,7 @@ define i64 @test_vleff_nxv8i8( *%p, i64 %vl) { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; CHECK-NEXT: [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 [[DEF]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.p, align 1) + ; CHECK-NEXT: [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 $noreg, [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.p, align 1) ; CHECK-NEXT: $x10 = COPY [[PseudoVLE8FF_V_M1_1]] ; CHECK-NEXT: PseudoRET implicit $x10 entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index fe5950b4c6de1..85a0509ba662d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -283,60 +283,59 @@ define @vpgather_baseidx_nxv32i8(ptr %base, %idxs %v = call @llvm.vp.gather.nxv32i8.nxv32p0( %ptrs, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index ead578865a904..acca2d2f40001 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -459,11 +459,11 @@ define @vpload_nxv16f64(* %ptr, @vpload_nxv17f64(* %ptr, @vpload_nxv17f64(* %ptr, @vpmerge_vv_nxv128i8( %va, %val, %val, %val, ) define half @vreduce_ord_fadd_nxv3f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv3f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: slli a1, a0, 1 ; CHECK-NEXT: add a1, a1, a0 ; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: lui a2, 1048568 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a2 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -910,16 +910,16 @@ declare half @llvm.vector.reduce.fadd.nxv6f16(half, ) define half @vreduce_ord_fadd_nxv6f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv6f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.v.x v11, a0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v9, v11, a0 +; CHECK-NEXT: vslideup.vx v9, v10, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -937,12 +937,13 @@ define half @vreduce_ord_fadd_nxv10f16( %v, half %s) { ; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vmv.v.v v11, v12 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 @@ -957,10 +958,10 @@ declare half @llvm.vector.reduce.fadd.nxv12f16(half, ) define half @vreduce_ord_fadd_nxv12f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv12f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v11, a0 +; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -973,14 +974,14 @@ define half @vreduce_ord_fadd_nxv12f16( %v, half %s) { define half @vreduce_fadd_nxv3f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv3f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: slli a1, a0, 1 ; CHECK-NEXT: add a1, a1, a0 ; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: lui a2, 1048568 +; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a2 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -995,16 +996,16 @@ define half @vreduce_fadd_nxv3f16( %v, half %s) { define half @vreduce_fadd_nxv6f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv6f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.v.x v11, a0 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vx v9, v11, a0 +; CHECK-NEXT: vslideup.vx v9, v10, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1017,18 +1018,19 @@ declare half @llvm.vector.reduce.fmin.nxv10f16() define half @vreduce_fmin_nxv10f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv10f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI73_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v12, (a0), zero ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: lui a1, %hi(.LCPI73_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI73_0) +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v12, (a1), zero ; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vmv.v.v v11, v12 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll index 75156d69cd6e1..13fd277e00f39 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -93,25 +93,24 @@ declare half @llvm.vp.reduce.fadd.nxv64f16(half, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a1, a2, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a2 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB6_2 +; CHECK-NEXT: vslidedown.vx v24, v0, a1 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub a1, a0, a2 +; CHECK-NEXT: sltu a3, a0, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: bltu a0, a2, .LBB6_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 @@ -123,25 +122,24 @@ define half @vpreduce_fadd_nxv64f16(half %s, %v, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a1, a2, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a2 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: vslidedown.vx v24, v0, a1 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub a1, a0, a2 +; CHECK-NEXT: sltu a3, a0, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: bltu a0, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: sub a1, a0, a1 -; CHECK-NEXT: sltu a0, a0, a1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll index 0af96e513bb05..666f77fd6152d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -1153,25 +1153,24 @@ declare i32 @llvm.vp.reduce.umax.nxv32i32(i32, , %v, %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_umax_nxv32i32: ; RV32: # %bb.0: -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: srli a3, a2, 2 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: srli a2, a3, 2 ; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v24, v0, a3 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: slli a2, a2, 1 -; RV32-NEXT: vmv.s.x v25, a0 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bltu a1, a2, .LBB67_2 +; RV32-NEXT: vslidedown.vx v24, v0, a2 +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: sub a2, a1, a3 +; RV32-NEXT: sltu a4, a1, a2 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: and a2, a4, a2 +; RV32-NEXT: bltu a1, a3, .LBB67_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB67_2: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v25, a0 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV32-NEXT: sub a0, a1, a2 -; RV32-NEXT: sltu a1, a1, a0 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vredmaxu.vs v25, v16, v25, v0.t ; RV32-NEXT: vmv.x.s a0, v25 @@ -1179,26 +1178,25 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, % ; ; RV64-LABEL: vpreduce_umax_nxv32i32: ; RV64: # %bb.0: -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: srli a3, a2, 2 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: srli a2, a3, 2 ; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a3 -; RV64-NEXT: slli a3, a0, 32 -; RV64-NEXT: slli a0, a2, 1 -; RV64-NEXT: srli a3, a3, 32 -; RV64-NEXT: mv a2, a1 -; RV64-NEXT: bltu a1, a0, .LBB67_2 +; RV64-NEXT: vslidedown.vx v24, v0, a2 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a2, a0, 32 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: sub a0, a1, a3 +; RV64-NEXT: sltu a4, a1, a0 +; RV64-NEXT: addi a4, a4, -1 +; RV64-NEXT: and a0, a4, a0 +; RV64-NEXT: bltu a1, a3, .LBB67_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a2, a0 +; RV64-NEXT: mv a1, a3 ; RV64-NEXT: .LBB67_2: ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v25, a3 -; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV64-NEXT: vmv.s.x v25, a2 +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sltu a1, a1, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vredmaxu.vs v25, v16, v25, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll index 034a122bbed70..aa671f9ca3774 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll @@ -1230,11 +1230,11 @@ define i64 @vreduce_umax_nxv1i64( %v) { ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredmaxu.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umax_nxv1i64: @@ -1254,11 +1254,11 @@ define i64 @vreduce_smax_nxv1i64( %v) { ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_nxv1i64: @@ -1278,11 +1278,11 @@ define i64 @vreduce_umin_nxv1i64( %v) { ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredminu.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umin_nxv1i64: @@ -1302,11 +1302,11 @@ define i64 @vreduce_smin_nxv1i64( %v) { ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_nxv1i64: @@ -1326,11 +1326,11 @@ define i64 @vreduce_and_nxv1i64( %v) { ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredand.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_and_nxv1i64: @@ -1350,11 +1350,11 @@ define i64 @vreduce_or_nxv1i64( %v) { ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredor.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 +; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v8, a1 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_or_nxv1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll index ed7f1fda1bcb1..b2663549e1172 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll @@ -1664,16 +1664,16 @@ define @vror_vx_nxv1i64( %a, i64 %b) { define @vror_vi_nxv1i64( %a) { ; CHECK-RV32-LABEL: vror_vi_nxv1i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v9, 1 +; CHECK-RV32-NEXT: vrsub.vi v9, v9, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v9, a0 -; CHECK-RV32-NEXT: vand.vi v9, v9, 1 -; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 -; CHECK-RV32-NEXT: vmv.v.i v10, 1 -; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v9, v8 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v8, v9 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_nxv1i64: @@ -1697,16 +1697,16 @@ define @vror_vi_nxv1i64( %a) { define @vror_vi_rotl_nxv1i64( %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_nxv1i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v9, 1 +; CHECK-RV32-NEXT: vrsub.vi v9, v9, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v9, a0 -; CHECK-RV32-NEXT: vand.vi v9, v9, 1 -; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 -; CHECK-RV32-NEXT: vmv.v.i v10, 1 -; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v9, v8 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v8, v9 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_nxv1i64: @@ -1810,16 +1810,16 @@ define @vror_vx_nxv2i64( %a, i64 %b) { define @vror_vi_nxv2i64( %a) { ; CHECK-RV32-LABEL: vror_vi_nxv2i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 1 +; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: vand.vi v10, v10, 1 -; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 -; CHECK-RV32-NEXT: vmv.v.i v12, 1 -; CHECK-RV32-NEXT: vrsub.vi v12, v12, 0 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_nxv2i64: @@ -1843,16 +1843,16 @@ define @vror_vi_nxv2i64( %a) { define @vror_vi_rotl_nxv2i64( %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_nxv2i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 1 +; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: vand.vi v10, v10, 1 -; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 -; CHECK-RV32-NEXT: vmv.v.i v12, 1 -; CHECK-RV32-NEXT: vrsub.vi v12, v12, 0 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_nxv2i64: @@ -1956,16 +1956,16 @@ define @vror_vx_nxv4i64( %a, i64 %b) { define @vror_vi_nxv4i64( %a) { ; CHECK-RV32-LABEL: vror_vi_nxv4i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v12, 1 +; CHECK-RV32-NEXT: vrsub.vi v12, v12, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: vand.vi v12, v12, 1 -; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 -; CHECK-RV32-NEXT: vmv.v.i v16, 1 -; CHECK-RV32-NEXT: vrsub.vi v16, v16, 0 -; CHECK-RV32-NEXT: vand.vx v16, v16, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 +; CHECK-RV32-NEXT: vmv.v.x v16, a0 +; CHECK-RV32-NEXT: vand.vi v16, v16, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v8, v12 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_nxv4i64: @@ -1989,16 +1989,16 @@ define @vror_vi_nxv4i64( %a) { define @vror_vi_rotl_nxv4i64( %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_nxv4i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v12, 1 +; CHECK-RV32-NEXT: vrsub.vi v12, v12, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: vand.vi v12, v12, 1 -; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 -; CHECK-RV32-NEXT: vmv.v.i v16, 1 -; CHECK-RV32-NEXT: vrsub.vi v16, v16, 0 -; CHECK-RV32-NEXT: vand.vx v16, v16, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 +; CHECK-RV32-NEXT: vmv.v.x v16, a0 +; CHECK-RV32-NEXT: vand.vi v16, v16, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v8, v12 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_nxv4i64: @@ -2102,16 +2102,16 @@ define @vror_vx_nxv8i64( %a, i64 %b) { define @vror_vi_nxv8i64( %a) { ; CHECK-RV32-LABEL: vror_vi_nxv8i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v16, 1 +; CHECK-RV32-NEXT: vrsub.vi v16, v16, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v16, a0 -; CHECK-RV32-NEXT: vand.vi v16, v16, 1 -; CHECK-RV32-NEXT: vsrl.vv v16, v8, v16 -; CHECK-RV32-NEXT: vmv.v.i v24, 1 -; CHECK-RV32-NEXT: vrsub.vi v24, v24, 0 -; CHECK-RV32-NEXT: vand.vx v24, v24, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v24 -; CHECK-RV32-NEXT: vor.vv v8, v16, v8 +; CHECK-RV32-NEXT: vand.vx v16, v16, a0 +; CHECK-RV32-NEXT: vsll.vv v16, v8, v16 +; CHECK-RV32-NEXT: vmv.v.x v24, a0 +; CHECK-RV32-NEXT: vand.vi v24, v24, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v24 +; CHECK-RV32-NEXT: vor.vv v8, v8, v16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_nxv8i64: @@ -2135,16 +2135,16 @@ define @vror_vi_nxv8i64( %a) { define @vror_vi_rotl_nxv8i64( %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_nxv8i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v16, 1 +; CHECK-RV32-NEXT: vrsub.vi v16, v16, 0 ; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v16, a0 -; CHECK-RV32-NEXT: vand.vi v16, v16, 1 -; CHECK-RV32-NEXT: vsll.vv v16, v8, v16 -; CHECK-RV32-NEXT: vmv.v.i v24, 1 -; CHECK-RV32-NEXT: vrsub.vi v24, v24, 0 -; CHECK-RV32-NEXT: vand.vx v24, v24, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v24 -; CHECK-RV32-NEXT: vor.vv v8, v16, v8 +; CHECK-RV32-NEXT: vand.vx v16, v16, a0 +; CHECK-RV32-NEXT: vsrl.vv v16, v8, v16 +; CHECK-RV32-NEXT: vmv.v.x v24, a0 +; CHECK-RV32-NEXT: vand.vi v24, v24, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v24 +; CHECK-RV32-NEXT: vor.vv v8, v8, v16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index 3ce9147622749..a624a42b3873b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -297,39 +297,39 @@ define @vtrunc_nxv32i64_nxv32i32( %a,