diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 4f2e633c1c524..c252875f05dca 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -100,7 +100,7 @@ class RISCVInstructionSelector : public InstructionSelector { bool selectIntrinsicWithSideEffects(MachineInstr &I, MachineIRBuilder &MIB) const; bool selectExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const; - + bool selectInsertSubVector(MachineInstr &I, MachineIRBuilder &MIB) const; ComplexRendererFns selectShiftMask(MachineOperand &Root, unsigned ShiftWidth) const; ComplexRendererFns selectShiftMaskXLen(MachineOperand &Root) const { @@ -1007,6 +1007,66 @@ bool RISCVInstructionSelector::selectExtractSubvector( return true; } +bool RISCVInstructionSelector::selectInsertSubVector( + MachineInstr &MI, MachineIRBuilder &MIB) const { + assert(MI.getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR); + + Register DstReg = MI.getOperand(0).getReg(); + Register VecReg = MI.getOperand(1).getReg(); + Register SubVecReg = MI.getOperand(2).getReg(); + + LLT VecTy = MRI->getType(VecReg); + LLT SubVecTy = MRI->getType(SubVecReg); + + MVT VecMVT = getMVTForLLT(VecTy); + MVT SubVecMVT = getMVTForLLT(SubVecTy); + + unsigned Idx = static_cast(MI.getOperand(3).getImm()); + + unsigned SubRegIdx; + std::tie(SubRegIdx, Idx) = + RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( + VecMVT, SubVecMVT, Idx, &TRI); + + // If the Idx hasn't been completely eliminated then this is a subvector + // insert which doesn't naturally align to a vector register. These must + // be handled using instructions to manipulate the vector registers. + if (Idx != 0) + return false; + + RISCVVType::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecMVT); + [[maybe_unused]] bool IsSubVecPartReg = + SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 || + SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 || + SubVecLMUL == RISCVVType::VLMUL::LMUL_F8; + + // Constrain dst + unsigned DstRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VecMVT); + const TargetRegisterClass *DstRC = TRI.getRegClass(DstRegClassID); + if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) + return false; + + // If we haven't set a SubRegIdx, then we must be going between + // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. + if (SubRegIdx == RISCV::NoSubRegister) { + assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) == + DstRegClassID && + "Unexpected subvector insert"); + MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg); + MI.eraseFromParent(); + return true; + } + + // Use INSERT_SUBREG to insert the subvector into the vector at the + // appropriate subregister index. + auto Ins = + MIB.buildInstr(TargetOpcode::INSERT_SUBREG, {DstReg}, {VecReg, SubVecReg}) + .addImm(SubRegIdx); + + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); +} + bool RISCVInstructionSelector::select(MachineInstr &MI) { MachineIRBuilder MIB(MI); @@ -1281,6 +1341,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { return selectIntrinsicWithSideEffects(MI, MIB); case TargetOpcode::G_EXTRACT_SUBVECTOR: return selectExtractSubvector(MI, MIB); + case TargetOpcode::G_INSERT_SUBVECTOR: + return selectInsertSubVector(MI, MIB); default: return false; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7cbb9c0da4874..f56922828cb41 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -25587,7 +25587,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { return false; } - if (II->getIntrinsicID() == Intrinsic::vector_extract) + if (II->getIntrinsicID() == Intrinsic::vector_extract || + II->getIntrinsicID() == Intrinsic::vector_insert) return false; } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll new file mode 100644 index 0000000000000..571e3bd25d1b0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll @@ -0,0 +1,411 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s + +define @insert_nxv8i32_nxv4i32_0( %vec, %subvec) { +; CHECK-LABEL: insert_nxv8i32_nxv4i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv4i32.nxv8i32( %vec, %subvec, i64 0) + ret %v +} + +define @insert_nxv8i32_nxv4i32_4( %vec, %subvec) { +; CHECK-LABEL: insert_nxv8i32_nxv4i32_4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv2r.v v10, v12 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv4i32.nxv8i32( %vec, %subvec, i64 4) + ret %v +} + +define @insert_nxv8i32_nxv2i32_0( %vec, %subvec) { +; CHECK-LABEL: insert_nxv8i32_nxv2i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv8i32( %vec, %subvec, i64 0) + ret %v +} + +define @insert_nxv8i32_nxv2i32_2( %vec, %subvec) { +; CHECK-LABEL: insert_nxv8i32_nxv2i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v12 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv8i32( %vec, %subvec, i64 2) + ret %v +} + +define @insert_nxv8i32_nxv2i32_4( %vec, %subvec) { +; CHECK-LABEL: insert_nxv8i32_nxv2i32_4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v12 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv8i32( %vec, %subvec, i64 4) + ret %v +} + +define @insert_nxv8i32_nxv2i32_6( %vec, %subvec) { +; CHECK-LABEL: insert_nxv8i32_nxv2i32_6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v11, v12 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv8i32( %vec, %subvec, i64 6) + ret %v +} + +define @insert_nxv1i8_nxv4i8_0( %vec, %subvec) { +; CHECK-LABEL: insert_nxv1i8_nxv4i8_0: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i8.nxv4i8( %vec, %subvec, i64 0) + ret %v +} + +define @insert_nxv1i8_nxv4i8_3( %vec, %subvec) { +; CHECK-LABEL: insert_nxv1i8_nxv4i8_3: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 3 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: mul a1, a0, a1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i8.nxv4i8( %vec, %subvec, i64 3) + ret %v +} + +define @insert_nxv16i32_nxv8i32_0( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv8i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv8i32.nxv16i32( %vec, %subvec, i64 0) + ret %v +} + +define @insert_nxv16i32_nxv8i32_8( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv8i32_8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv4r.v v12, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv8i32.nxv16i32( %vec, %subvec, i64 8) + ret %v +} + +define @insert_nxv16i32_nxv4i32_0( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv4i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv2r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv4i32.nxv16i32( %vec, %subvec, i64 0) + ret %v +} + +define @insert_nxv16i32_nxv4i32_4( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv4i32_4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv2r.v v10, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv4i32.nxv16i32( %vec, %subvec, i64 4) + ret %v +} + +define @insert_nxv16i32_nxv4i32_8( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv4i32_8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv2r.v v12, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv4i32.nxv16i32( %vec, %subvec, i64 8) + ret %v +} + +define @insert_nxv16i32_nxv4i32_12( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv4i32_12: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv2r.v v14, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv4i32.nxv16i32( %vec, %subvec, i64 12) + ret %v +} + +define @insert_nxv16i32_nxv2i32_0( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv2i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 0) + ret %v +} + +define @insert_nxv16i32_nxv2i32_2( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv2i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 2) + ret %v +} + +define @insert_nxv16i32_nxv2i32_4( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv2i32_4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 4) + ret %v +} + +define @insert_nxv16i32_nxv2i32_6( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv2i32_6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v11, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 6) + ret %v +} + +define @insert_nxv16i32_nxv2i32_8( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv2i32_8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v12, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 8) + ret %v +} + +define @insert_nxv16i32_nxv2i32_10( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv2i32_10: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v13, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 10) + ret %v +} + +define @insert_nxv16i32_nxv2i32_12( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv2i32_12: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v14, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 12) + ret %v +} + +define @insert_nxv16i32_nxv2i32_14( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv2i32_14: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v15, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 14) + ret %v +} + +define @insert_nxv16i32_nxv1i32_0( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv1i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i32.nxv16i32( %vec, %subvec, i64 0) + ret %v +} + +define @insert_nxv16i32_nxv1i32_1( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv1i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i32.nxv16i32( %vec, %subvec, i64 1) + ret %v +} + +define @insert_nxv16i32_nxv1i32_6( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i32_nxv1i32_6: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v11, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i32.nxv16i32( %vec, %subvec, i64 6) + ret %v +} + +define @insert_nxv16i8_nxv1i8_0( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i8_nxv1i8_0: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i8.nxv16i8( %vec, %subvec, i64 0) + ret %v +} + +define @insert_nxv16i8_nxv1i8_1( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i8_nxv1i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i8.nxv16i8( %vec, %subvec, i64 1) + ret %v +} + +define @insert_nxv16i8_nxv1i8_2( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i8_nxv1i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i8.nxv16i8( %vec, %subvec, i64 2) + ret %v +} + +define @insert_nxv16i8_nxv1i8_3( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i8_nxv1i8_3: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 3 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: mul a1, a0, a1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vslideup.vx v8, v10, a1 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i8.nxv16i8( %vec, %subvec, i64 3) + ret %v +} + +define @insert_nxv16i8_nxv1i8_7( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i8_nxv1i8_7: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: li a1, 7 +; CHECK-NEXT: mul a1, a0, a1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v10, a1 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i8.nxv16i8( %vec, %subvec, i64 7) + ret %v +} + +define @insert_nxv16i8_nxv1i8_15( %vec, %subvec) { +; CHECK-LABEL: insert_nxv16i8_nxv1i8_15: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: li a1, 7 +; CHECK-NEXT: mul a1, a0, a1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v9, v10, a1 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1i8.nxv16i8( %vec, %subvec, i64 15) + ret %v +} + +define @insert_nxv32f16_nxv2f16_0( %vec, %subvec) { +; CHECK-LABEL: insert_nxv32f16_nxv2f16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2f16.nxv32f16( %vec, %subvec, i64 0) + ret %v +} + +define @insert_nxv32f16_nxv2f16_2( %vec, %subvec) { +; CHECK-LABEL: insert_nxv32f16_nxv2f16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2f16.nxv32f16( %vec, %subvec, i64 2) + ret %v +} + +define @insert_nxv32f16_nxv2f16_26( %vec, %subvec) { +; CHECK-LABEL: insert_nxv32f16_nxv2f16_26: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v14, v16, a0 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv2f16.nxv32f16( %vec, %subvec, i64 26) + ret %v +} + +define @insert_nxv32f16_undef_nxv1f16_0( %subvec) { +; CHECK-LABEL: insert_nxv32f16_undef_nxv1f16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1f16.nxv32f16( poison, %subvec, i64 0) + ret %v +} + +define @insert_nxv32f16_undef_nxv1f16_26( %subvec) { +; CHECK-LABEL: insert_nxv32f16_undef_nxv1f16_26: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v14, v8, a0 +; CHECK-NEXT: ret + %v = call @llvm.vector.insert.nxv1f16.nxv32f16( poison, %subvec, i64 26) + ret %v +}