Skip to content

Conversation

@jacquesguan
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Dec 8, 2025

@llvm/pr-subscribers-llvm-globalisel

Author: Jianjian Guan (jacquesguan)

Changes

Patch is 22.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171092.diff

3 Files Affected:

  • (modified) llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp (+63-1)
  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+2-1)
  • (added) llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll (+411)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 4f2e633c1c524..c252875f05dca 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -100,7 +100,7 @@ class RISCVInstructionSelector : public InstructionSelector {
   bool selectIntrinsicWithSideEffects(MachineInstr &I,
                                       MachineIRBuilder &MIB) const;
   bool selectExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
-
+  bool selectInsertSubVector(MachineInstr &I, MachineIRBuilder &MIB) const;
   ComplexRendererFns selectShiftMask(MachineOperand &Root,
                                      unsigned ShiftWidth) const;
   ComplexRendererFns selectShiftMaskXLen(MachineOperand &Root) const {
@@ -1007,6 +1007,66 @@ bool RISCVInstructionSelector::selectExtractSubvector(
   return true;
 }
 
+bool RISCVInstructionSelector::selectInsertSubVector(
+    MachineInstr &MI, MachineIRBuilder &MIB) const {
+  assert(MI.getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR);
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register VecReg = MI.getOperand(1).getReg();
+  Register SubVecReg = MI.getOperand(2).getReg();
+
+  LLT VecTy = MRI->getType(VecReg);
+  LLT SubVecTy = MRI->getType(SubVecReg);
+
+  MVT VecMVT = getMVTForLLT(VecTy);
+  MVT SubVecMVT = getMVTForLLT(SubVecTy);
+
+  unsigned Idx = static_cast<unsigned>(MI.getOperand(3).getImm());
+
+  unsigned SubRegIdx;
+  std::tie(SubRegIdx, Idx) =
+      RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+          VecMVT, SubVecMVT, Idx, &TRI);
+
+  // If the Idx hasn't been completely eliminated then this is a subvector
+  // insert which doesn't naturally align to a vector register. These must
+  // be handled using instructions to manipulate the vector registers.
+  if (Idx != 0)
+    return false;
+
+  RISCVVType::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecMVT);
+  [[maybe_unused]] bool IsSubVecPartReg =
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
+
+  // Constrain dst
+  unsigned DstRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VecMVT);
+  const TargetRegisterClass *DstRC = TRI.getRegClass(DstRegClassID);
+  if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
+    return false;
+
+  // If we haven't set a SubRegIdx, then we must be going between
+  // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
+  if (SubRegIdx == RISCV::NoSubRegister) {
+    assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) ==
+               DstRegClassID &&
+           "Unexpected subvector insert");
+    MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Use INSERT_SUBREG to insert the subvector into the vector at the
+  // appropriate subregister index.
+  auto Ins =
+      MIB.buildInstr(TargetOpcode::INSERT_SUBREG, {DstReg}, {VecReg, SubVecReg})
+          .addImm(SubRegIdx);
+
+  MI.eraseFromParent();
+  return constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+}
+
 bool RISCVInstructionSelector::select(MachineInstr &MI) {
   MachineIRBuilder MIB(MI);
 
@@ -1281,6 +1341,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
     return selectIntrinsicWithSideEffects(MI, MIB);
   case TargetOpcode::G_EXTRACT_SUBVECTOR:
     return selectExtractSubvector(MI, MIB);
+  case TargetOpcode::G_INSERT_SUBVECTOR:
+    return selectInsertSubVector(MI, MIB);
   default:
     return false;
   }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a6eb225e24609..c79f9ce338c80 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25580,7 +25580,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
 
       return false;
     }
-    if (II->getIntrinsicID() == Intrinsic::vector_extract)
+    if (II->getIntrinsicID() == Intrinsic::vector_extract ||
+        II->getIntrinsicID() == Intrinsic::vector_insert)
       return false;
   }
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
new file mode 100644
index 0000000000000..9c83c6fe2ba27
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
@@ -0,0 +1,411 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v10, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v9, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v10, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v11, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_0(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    li a1, 3
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv4r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv4r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v10, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v14, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 12)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v9, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v10, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v11, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v13, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 10)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v14, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 12)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_14:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v15, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 14)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    add a1, a0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v16, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_6(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v11, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 6)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_0(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    add a1, a0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 1)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_2(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a1, a0, 3
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    add a1, a0, a1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 2)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_3(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    li a1, 3
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_7(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    li a1, 7
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 7)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_15(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_15:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    li a1, 7
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v9, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 15)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_0(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec) {
+; CHECK-LABEL: insert_nxv32f16_nxv2f16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x h...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Dec 8, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Jianjian Guan (jacquesguan)

Changes

Patch is 22.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171092.diff

3 Files Affected:

  • (modified) llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp (+63-1)
  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+2-1)
  • (added) llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll (+411)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 4f2e633c1c524..c252875f05dca 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -100,7 +100,7 @@ class RISCVInstructionSelector : public InstructionSelector {
   bool selectIntrinsicWithSideEffects(MachineInstr &I,
                                       MachineIRBuilder &MIB) const;
   bool selectExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
-
+  bool selectInsertSubVector(MachineInstr &I, MachineIRBuilder &MIB) const;
   ComplexRendererFns selectShiftMask(MachineOperand &Root,
                                      unsigned ShiftWidth) const;
   ComplexRendererFns selectShiftMaskXLen(MachineOperand &Root) const {
@@ -1007,6 +1007,66 @@ bool RISCVInstructionSelector::selectExtractSubvector(
   return true;
 }
 
+bool RISCVInstructionSelector::selectInsertSubVector(
+    MachineInstr &MI, MachineIRBuilder &MIB) const {
+  assert(MI.getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR);
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register VecReg = MI.getOperand(1).getReg();
+  Register SubVecReg = MI.getOperand(2).getReg();
+
+  LLT VecTy = MRI->getType(VecReg);
+  LLT SubVecTy = MRI->getType(SubVecReg);
+
+  MVT VecMVT = getMVTForLLT(VecTy);
+  MVT SubVecMVT = getMVTForLLT(SubVecTy);
+
+  unsigned Idx = static_cast<unsigned>(MI.getOperand(3).getImm());
+
+  unsigned SubRegIdx;
+  std::tie(SubRegIdx, Idx) =
+      RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+          VecMVT, SubVecMVT, Idx, &TRI);
+
+  // If the Idx hasn't been completely eliminated then this is a subvector
+  // insert which doesn't naturally align to a vector register. These must
+  // be handled using instructions to manipulate the vector registers.
+  if (Idx != 0)
+    return false;
+
+  RISCVVType::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecMVT);
+  [[maybe_unused]] bool IsSubVecPartReg =
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
+
+  // Constrain dst
+  unsigned DstRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VecMVT);
+  const TargetRegisterClass *DstRC = TRI.getRegClass(DstRegClassID);
+  if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
+    return false;
+
+  // If we haven't set a SubRegIdx, then we must be going between
+  // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
+  if (SubRegIdx == RISCV::NoSubRegister) {
+    assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) ==
+               DstRegClassID &&
+           "Unexpected subvector insert");
+    MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Use INSERT_SUBREG to insert the subvector into the vector at the
+  // appropriate subregister index.
+  auto Ins =
+      MIB.buildInstr(TargetOpcode::INSERT_SUBREG, {DstReg}, {VecReg, SubVecReg})
+          .addImm(SubRegIdx);
+
+  MI.eraseFromParent();
+  return constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+}
+
 bool RISCVInstructionSelector::select(MachineInstr &MI) {
   MachineIRBuilder MIB(MI);
 
@@ -1281,6 +1341,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
     return selectIntrinsicWithSideEffects(MI, MIB);
   case TargetOpcode::G_EXTRACT_SUBVECTOR:
     return selectExtractSubvector(MI, MIB);
+  case TargetOpcode::G_INSERT_SUBVECTOR:
+    return selectInsertSubVector(MI, MIB);
   default:
     return false;
   }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a6eb225e24609..c79f9ce338c80 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25580,7 +25580,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
 
       return false;
     }
-    if (II->getIntrinsicID() == Intrinsic::vector_extract)
+    if (II->getIntrinsicID() == Intrinsic::vector_extract ||
+        II->getIntrinsicID() == Intrinsic::vector_insert)
       return false;
   }
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
new file mode 100644
index 0000000000000..9c83c6fe2ba27
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
@@ -0,0 +1,411 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v10, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v9, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v10, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v11, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_0(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    li a1, 3
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv4r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv4r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v10, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v14, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 12)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v9, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v10, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v11, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v13, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 10)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v14, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 12)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_14:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v15, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 14)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    add a1, a0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v16, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_6(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v11, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 6)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_0(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    add a1, a0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 1)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_2(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a1, a0, 3
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    add a1, a0, a1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 2)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_3(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    li a1, 3
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_7(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    li a1, 7
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 7)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_15(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_15:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    li a1, 7
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v9, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 15)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_0(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec) {
+; CHECK-LABEL: insert_nxv32f16_nxv2f16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x h...
[truncated]

@github-actions
Copy link

github-actions bot commented Dec 8, 2025

🐧 Linux x64 Test Results

  • 187214 tests passed
  • 4941 tests skipped

✅ The build succeeded and all tests passed.

@topperc
Copy link
Collaborator

topperc commented Dec 8, 2025

Looks like you have a failing test

assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) ==
DstRegClassID &&
"Unexpected subvector insert");
MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Selector should not be using MachineInstrBuilder, and use direct BuildMI calls

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems that we only use MachineInstrBuilder in RISCVInstructionSelector.cpp, I think maybe refactor them all in a new pr.

Comment on lines 2 to 3
; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -global-isel -mtriple=riscv32-mattr=+m,+d,+zvfh,+v,+zvfbfmin < %s | FileCheck %s
; RUN: llc -global-isel -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v,+zvfbfmin < %s | FileCheck %s

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added =.

@jacquesguan jacquesguan force-pushed the riscv-gisel-select-insert-subvector branch from 081176d to 7969b4e Compare December 10, 2025 06:28
@jacquesguan jacquesguan requested a review from topperc December 10, 2025 08:32
@jacquesguan
Copy link
Contributor Author

Looks like you have a failing test

OK now after rebase with #171091.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants