[RISCV][GISel] Support select G_INSERT_SUBVECTOR #171092

jacquesguan · 2025-12-08T08:31:53Z

No description provided.

llvmbot · 2025-12-08T08:32:23Z

@llvm/pr-subscribers-llvm-globalisel

Author: Jianjian Guan (jacquesguan)

Changes

Patch is 22.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171092.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp (+63-1)
(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+2-1)
(added) llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll (+411)

diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 4f2e633c1c524..c252875f05dca 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -100,7 +100,7 @@ class RISCVInstructionSelector : public InstructionSelector {
   bool selectIntrinsicWithSideEffects(MachineInstr &I,
                                       MachineIRBuilder &MIB) const;
   bool selectExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
-
+  bool selectInsertSubVector(MachineInstr &I, MachineIRBuilder &MIB) const;
   ComplexRendererFns selectShiftMask(MachineOperand &Root,
                                      unsigned ShiftWidth) const;
   ComplexRendererFns selectShiftMaskXLen(MachineOperand &Root) const {
@@ -1007,6 +1007,66 @@ bool RISCVInstructionSelector::selectExtractSubvector(
   return true;
 }
 
+bool RISCVInstructionSelector::selectInsertSubVector(
+    MachineInstr &MI, MachineIRBuilder &MIB) const {
+  assert(MI.getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR);
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register VecReg = MI.getOperand(1).getReg();
+  Register SubVecReg = MI.getOperand(2).getReg();
+
+  LLT VecTy = MRI->getType(VecReg);
+  LLT SubVecTy = MRI->getType(SubVecReg);
+
+  MVT VecMVT = getMVTForLLT(VecTy);
+  MVT SubVecMVT = getMVTForLLT(SubVecTy);
+
+  unsigned Idx = static_cast<unsigned>(MI.getOperand(3).getImm());
+
+  unsigned SubRegIdx;
+  std::tie(SubRegIdx, Idx) =
+      RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+          VecMVT, SubVecMVT, Idx, &TRI);
+
+  // If the Idx hasn't been completely eliminated then this is a subvector
+  // insert which doesn't naturally align to a vector register. These must
+  // be handled using instructions to manipulate the vector registers.
+  if (Idx != 0)
+    return false;
+
+  RISCVVType::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecMVT);
+  [[maybe_unused]] bool IsSubVecPartReg =
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
+
+  // Constrain dst
+  unsigned DstRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VecMVT);
+  const TargetRegisterClass *DstRC = TRI.getRegClass(DstRegClassID);
+  if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
+    return false;
+
+  // If we haven't set a SubRegIdx, then we must be going between
+  // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
+  if (SubRegIdx == RISCV::NoSubRegister) {
+    assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) ==
+               DstRegClassID &&
+           "Unexpected subvector insert");
+    MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Use INSERT_SUBREG to insert the subvector into the vector at the
+  // appropriate subregister index.
+  auto Ins =
+      MIB.buildInstr(TargetOpcode::INSERT_SUBREG, {DstReg}, {VecReg, SubVecReg})
+          .addImm(SubRegIdx);
+
+  MI.eraseFromParent();
+  return constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+}
+
 bool RISCVInstructionSelector::select(MachineInstr &MI) {
   MachineIRBuilder MIB(MI);
 
@@ -1281,6 +1341,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
     return selectIntrinsicWithSideEffects(MI, MIB);
   case TargetOpcode::G_EXTRACT_SUBVECTOR:
     return selectExtractSubvector(MI, MIB);
+  case TargetOpcode::G_INSERT_SUBVECTOR:
+    return selectInsertSubVector(MI, MIB);
   default:
     return false;
   }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a6eb225e24609..c79f9ce338c80 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25580,7 +25580,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
 
       return false;
     }
-    if (II->getIntrinsicID() == Intrinsic::vector_extract)
+    if (II->getIntrinsicID() == Intrinsic::vector_extract ||
+        II->getIntrinsicID() == Intrinsic::vector_insert)
       return false;
   }
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
new file mode 100644
index 0000000000000..9c83c6fe2ba27
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
@@ -0,0 +1,411 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v10, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v9, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v10, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v11, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_0(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    li a1, 3
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv4r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv4r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v10, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v14, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 12)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v9, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v10, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v11, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v13, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 10)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v14, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 12)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_14:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v15, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 14)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    add a1, a0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v16, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_6(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v11, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 6)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_0(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    add a1, a0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 1)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_2(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a1, a0, 3
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    add a1, a0, a1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 2)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_3(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    li a1, 3
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_7(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    li a1, 7
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 7)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_15(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_15:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    li a1, 7
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v9, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 15)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_0(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec) {
+; CHECK-LABEL: insert_nxv32f16_nxv2f16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x h...
[truncated]

llvmbot · 2025-12-08T08:32:24Z

@llvm/pr-subscribers-backend-risc-v

Author: Jianjian Guan (jacquesguan)

Changes

Patch is 22.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171092.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp (+63-1)
(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+2-1)
(added) llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll (+411)

diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 4f2e633c1c524..c252875f05dca 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -100,7 +100,7 @@ class RISCVInstructionSelector : public InstructionSelector {
   bool selectIntrinsicWithSideEffects(MachineInstr &I,
                                       MachineIRBuilder &MIB) const;
   bool selectExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
-
+  bool selectInsertSubVector(MachineInstr &I, MachineIRBuilder &MIB) const;
   ComplexRendererFns selectShiftMask(MachineOperand &Root,
                                      unsigned ShiftWidth) const;
   ComplexRendererFns selectShiftMaskXLen(MachineOperand &Root) const {
@@ -1007,6 +1007,66 @@ bool RISCVInstructionSelector::selectExtractSubvector(
   return true;
 }
 
+bool RISCVInstructionSelector::selectInsertSubVector(
+    MachineInstr &MI, MachineIRBuilder &MIB) const {
+  assert(MI.getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR);
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register VecReg = MI.getOperand(1).getReg();
+  Register SubVecReg = MI.getOperand(2).getReg();
+
+  LLT VecTy = MRI->getType(VecReg);
+  LLT SubVecTy = MRI->getType(SubVecReg);
+
+  MVT VecMVT = getMVTForLLT(VecTy);
+  MVT SubVecMVT = getMVTForLLT(SubVecTy);
+
+  unsigned Idx = static_cast<unsigned>(MI.getOperand(3).getImm());
+
+  unsigned SubRegIdx;
+  std::tie(SubRegIdx, Idx) =
+      RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+          VecMVT, SubVecMVT, Idx, &TRI);
+
+  // If the Idx hasn't been completely eliminated then this is a subvector
+  // insert which doesn't naturally align to a vector register. These must
+  // be handled using instructions to manipulate the vector registers.
+  if (Idx != 0)
+    return false;
+
+  RISCVVType::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecMVT);
+  [[maybe_unused]] bool IsSubVecPartReg =
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
+      SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
+
+  // Constrain dst
+  unsigned DstRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VecMVT);
+  const TargetRegisterClass *DstRC = TRI.getRegClass(DstRegClassID);
+  if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
+    return false;
+
+  // If we haven't set a SubRegIdx, then we must be going between
+  // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
+  if (SubRegIdx == RISCV::NoSubRegister) {
+    assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) ==
+               DstRegClassID &&
+           "Unexpected subvector insert");
+    MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Use INSERT_SUBREG to insert the subvector into the vector at the
+  // appropriate subregister index.
+  auto Ins =
+      MIB.buildInstr(TargetOpcode::INSERT_SUBREG, {DstReg}, {VecReg, SubVecReg})
+          .addImm(SubRegIdx);
+
+  MI.eraseFromParent();
+  return constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+}
+
 bool RISCVInstructionSelector::select(MachineInstr &MI) {
   MachineIRBuilder MIB(MI);
 
@@ -1281,6 +1341,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
     return selectIntrinsicWithSideEffects(MI, MIB);
   case TargetOpcode::G_EXTRACT_SUBVECTOR:
     return selectExtractSubvector(MI, MIB);
+  case TargetOpcode::G_INSERT_SUBVECTOR:
+    return selectInsertSubVector(MI, MIB);
   default:
     return false;
   }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a6eb225e24609..c79f9ce338c80 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25580,7 +25580,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
 
       return false;
     }
-    if (II->getIntrinsicID() == Intrinsic::vector_extract)
+    if (II->getIntrinsicID() == Intrinsic::vector_extract ||
+        II->getIntrinsicID() == Intrinsic::vector_insert)
       return false;
   }
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
new file mode 100644
index 0000000000000..9c83c6fe2ba27
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
@@ -0,0 +1,411 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v10, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v9, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v10, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v11, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_0(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    li a1, 3
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v9, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv4r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv4r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v10, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv2r.v v14, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 12)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v9, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v10, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v11, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v12, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 8)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v13, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 10)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_12:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v14, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 12)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_14:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv1r.v v15, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 14)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 0)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    add a1, a0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v16, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_6(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v11, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 6)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_0(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    add a1, a0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 1)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_2(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a1, a0, 3
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    add a1, a0, a1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a0
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 2)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_3(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    li a1, 3
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_7(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    li a1, 7
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v8, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 7)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_15(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_15:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    li a1, 7
+; CHECK-NEXT:    mul a1, a0, a1
+; CHECK-NEXT:    add a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v9, v10, a1
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 15)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_0(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec) {
+; CHECK-LABEL: insert_nxv32f16_nxv2f16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x h...
[truncated]

github-actions · 2025-12-08T09:05:06Z

🐧 Linux x64 Test Results

187214 tests passed
4941 tests skipped

✅ The build succeeded and all tests passed.

topperc · 2025-12-08T17:06:50Z

Looks like you have a failing test

arsenm · 2025-12-08T22:35:39Z

llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp

+    assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) ==
+               DstRegClassID &&
+           "Unexpected subvector insert");
+    MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg);


Selector should not be using MachineInstrBuilder, and use direct BuildMI calls

Seems that we only use MachineInstrBuilder in RISCVInstructionSelector.cpp, I think maybe refactor them all in a new pr.

arsenm · 2025-12-08T22:36:21Z

llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll

+; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s


Suggested change

; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s

; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s

; RUN: llc -global-isel -mtriple=riscv32-mattr=+m,+d,+zvfh,+v,+zvfbfmin < %s | FileCheck %s

; RUN: llc -global-isel -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v,+zvfbfmin < %s | FileCheck %s

jacquesguan · 2025-12-10T08:34:21Z

Looks like you have a failing test

OK now after rebase with #171091.

llvmbot added backend:RISC-V llvm:globalisel labels Dec 8, 2025

arsenm reviewed Dec 8, 2025

View reviewed changes

[RISCV][GISel] Support select G_INSERT_SUBVECTOR

7969b4e

jacquesguan force-pushed the riscv-gisel-select-insert-subvector branch from 081176d to 7969b4e Compare December 10, 2025 06:28

jacquesguan requested a review from topperc December 10, 2025 08:32

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV][GISel] Support select G_INSERT_SUBVECTOR #171092

[RISCV][GISel] Support select G_INSERT_SUBVECTOR #171092

jacquesguan commented Dec 8, 2025

Uh oh!

llvmbot commented Dec 8, 2025

Uh oh!

llvmbot commented Dec 8, 2025

Uh oh!

github-actions bot commented Dec 8, 2025 •

edited

Loading

Uh oh!

topperc commented Dec 8, 2025

Uh oh!

arsenm Dec 8, 2025

Uh oh!

jacquesguan Dec 10, 2025

Uh oh!

arsenm Dec 8, 2025

Uh oh!

jacquesguan Dec 10, 2025

Uh oh!

jacquesguan commented Dec 10, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

		; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s \| FileCheck %s
		; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s \| FileCheck %s

[RISCV][GISel] Support select G_INSERT_SUBVECTOR #171092

Are you sure you want to change the base?

[RISCV][GISel] Support select G_INSERT_SUBVECTOR #171092

Conversation

jacquesguan commented Dec 8, 2025

Uh oh!

llvmbot commented Dec 8, 2025

Uh oh!

llvmbot commented Dec 8, 2025

Uh oh!

github-actions bot commented Dec 8, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

topperc commented Dec 8, 2025

Uh oh!

arsenm Dec 8, 2025

Choose a reason for hiding this comment

Uh oh!

jacquesguan Dec 10, 2025

Choose a reason for hiding this comment

Uh oh!

arsenm Dec 8, 2025

Choose a reason for hiding this comment

Uh oh!

jacquesguan Dec 10, 2025

Choose a reason for hiding this comment

Uh oh!

jacquesguan commented Dec 10, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

github-actions bot commented Dec 8, 2025 •

edited

Loading