-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[RISCV][GISel] Support select G_INSERT_SUBVECTOR #171092
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[RISCV][GISel] Support select G_INSERT_SUBVECTOR #171092
Conversation
|
@llvm/pr-subscribers-llvm-globalisel Author: Jianjian Guan (jacquesguan) ChangesPatch is 22.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171092.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 4f2e633c1c524..c252875f05dca 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -100,7 +100,7 @@ class RISCVInstructionSelector : public InstructionSelector {
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineIRBuilder &MIB) const;
bool selectExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
-
+ bool selectInsertSubVector(MachineInstr &I, MachineIRBuilder &MIB) const;
ComplexRendererFns selectShiftMask(MachineOperand &Root,
unsigned ShiftWidth) const;
ComplexRendererFns selectShiftMaskXLen(MachineOperand &Root) const {
@@ -1007,6 +1007,66 @@ bool RISCVInstructionSelector::selectExtractSubvector(
return true;
}
+bool RISCVInstructionSelector::selectInsertSubVector(
+ MachineInstr &MI, MachineIRBuilder &MIB) const {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register VecReg = MI.getOperand(1).getReg();
+ Register SubVecReg = MI.getOperand(2).getReg();
+
+ LLT VecTy = MRI->getType(VecReg);
+ LLT SubVecTy = MRI->getType(SubVecReg);
+
+ MVT VecMVT = getMVTForLLT(VecTy);
+ MVT SubVecMVT = getMVTForLLT(SubVecTy);
+
+ unsigned Idx = static_cast<unsigned>(MI.getOperand(3).getImm());
+
+ unsigned SubRegIdx;
+ std::tie(SubRegIdx, Idx) =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ VecMVT, SubVecMVT, Idx, &TRI);
+
+ // If the Idx hasn't been completely eliminated then this is a subvector
+ // insert which doesn't naturally align to a vector register. These must
+ // be handled using instructions to manipulate the vector registers.
+ if (Idx != 0)
+ return false;
+
+ RISCVVType::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecMVT);
+ [[maybe_unused]] bool IsSubVecPartReg =
+ SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
+ SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
+ SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
+
+ // Constrain dst
+ unsigned DstRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VecMVT);
+ const TargetRegisterClass *DstRC = TRI.getRegClass(DstRegClassID);
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
+ return false;
+
+ // If we haven't set a SubRegIdx, then we must be going between
+ // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
+ if (SubRegIdx == RISCV::NoSubRegister) {
+ assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) ==
+ DstRegClassID &&
+ "Unexpected subvector insert");
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // Use INSERT_SUBREG to insert the subvector into the vector at the
+ // appropriate subregister index.
+ auto Ins =
+ MIB.buildInstr(TargetOpcode::INSERT_SUBREG, {DstReg}, {VecReg, SubVecReg})
+ .addImm(SubRegIdx);
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+}
+
bool RISCVInstructionSelector::select(MachineInstr &MI) {
MachineIRBuilder MIB(MI);
@@ -1281,6 +1341,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
return selectIntrinsicWithSideEffects(MI, MIB);
case TargetOpcode::G_EXTRACT_SUBVECTOR:
return selectExtractSubvector(MI, MIB);
+ case TargetOpcode::G_INSERT_SUBVECTOR:
+ return selectInsertSubVector(MI, MIB);
default:
return false;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a6eb225e24609..c79f9ce338c80 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25580,7 +25580,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
return false;
}
- if (II->getIntrinsicID() == Intrinsic::vector_extract)
+ if (II->getIntrinsicID() == Intrinsic::vector_extract ||
+ II->getIntrinsicID() == Intrinsic::vector_insert)
return false;
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
new file mode 100644
index 0000000000000..9c83c6fe2ba27
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
@@ -0,0 +1,411 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v10, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v11, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_0(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 3
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv4r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v10, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v14, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 12)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v11, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v13, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 10)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v14, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 12)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v15, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 14)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_6(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v11, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 6)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_0(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 1)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_2(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 2)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_3(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 3
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_7(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 7)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_15(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_15:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v10, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 15)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_0(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec) {
+; CHECK-LABEL: insert_nxv32f16_nxv2f16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x h...
[truncated]
|
|
@llvm/pr-subscribers-backend-risc-v Author: Jianjian Guan (jacquesguan) ChangesPatch is 22.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171092.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 4f2e633c1c524..c252875f05dca 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -100,7 +100,7 @@ class RISCVInstructionSelector : public InstructionSelector {
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineIRBuilder &MIB) const;
bool selectExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
-
+ bool selectInsertSubVector(MachineInstr &I, MachineIRBuilder &MIB) const;
ComplexRendererFns selectShiftMask(MachineOperand &Root,
unsigned ShiftWidth) const;
ComplexRendererFns selectShiftMaskXLen(MachineOperand &Root) const {
@@ -1007,6 +1007,66 @@ bool RISCVInstructionSelector::selectExtractSubvector(
return true;
}
+bool RISCVInstructionSelector::selectInsertSubVector(
+ MachineInstr &MI, MachineIRBuilder &MIB) const {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register VecReg = MI.getOperand(1).getReg();
+ Register SubVecReg = MI.getOperand(2).getReg();
+
+ LLT VecTy = MRI->getType(VecReg);
+ LLT SubVecTy = MRI->getType(SubVecReg);
+
+ MVT VecMVT = getMVTForLLT(VecTy);
+ MVT SubVecMVT = getMVTForLLT(SubVecTy);
+
+ unsigned Idx = static_cast<unsigned>(MI.getOperand(3).getImm());
+
+ unsigned SubRegIdx;
+ std::tie(SubRegIdx, Idx) =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ VecMVT, SubVecMVT, Idx, &TRI);
+
+ // If the Idx hasn't been completely eliminated then this is a subvector
+ // insert which doesn't naturally align to a vector register. These must
+ // be handled using instructions to manipulate the vector registers.
+ if (Idx != 0)
+ return false;
+
+ RISCVVType::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecMVT);
+ [[maybe_unused]] bool IsSubVecPartReg =
+ SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
+ SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
+ SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
+
+ // Constrain dst
+ unsigned DstRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VecMVT);
+ const TargetRegisterClass *DstRC = TRI.getRegClass(DstRegClassID);
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
+ return false;
+
+ // If we haven't set a SubRegIdx, then we must be going between
+ // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
+ if (SubRegIdx == RISCV::NoSubRegister) {
+ assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) ==
+ DstRegClassID &&
+ "Unexpected subvector insert");
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // Use INSERT_SUBREG to insert the subvector into the vector at the
+ // appropriate subregister index.
+ auto Ins =
+ MIB.buildInstr(TargetOpcode::INSERT_SUBREG, {DstReg}, {VecReg, SubVecReg})
+ .addImm(SubRegIdx);
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+}
+
bool RISCVInstructionSelector::select(MachineInstr &MI) {
MachineIRBuilder MIB(MI);
@@ -1281,6 +1341,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
return selectIntrinsicWithSideEffects(MI, MIB);
case TargetOpcode::G_EXTRACT_SUBVECTOR:
return selectExtractSubvector(MI, MIB);
+ case TargetOpcode::G_INSERT_SUBVECTOR:
+ return selectInsertSubVector(MI, MIB);
default:
return false;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a6eb225e24609..c79f9ce338c80 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25580,7 +25580,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
return false;
}
- if (II->getIntrinsicID() == Intrinsic::vector_extract)
+ if (II->getIntrinsicID() == Intrinsic::vector_extract ||
+ II->getIntrinsicID() == Intrinsic::vector_insert)
return false;
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
new file mode 100644
index 0000000000000..9c83c6fe2ba27
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
@@ -0,0 +1,411 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v10, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v11, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_0(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 3
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv4r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v10, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v14, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 12)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v11, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v13, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 10)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v14, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 12)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v15, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 14)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_6(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v11, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 6)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_0(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 1)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_2(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 2)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_3(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 3
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_7(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 7)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_15(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_15:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v10, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 15)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_0(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec) {
+; CHECK-LABEL: insert_nxv32f16_nxv2f16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x h...
[truncated]
|
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
|
Looks like you have a failing test |
| assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) == | ||
| DstRegClassID && | ||
| "Unexpected subvector insert"); | ||
| MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Selector should not be using MachineInstrBuilder, and use direct BuildMI calls
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems that we only use MachineInstrBuilder in RISCVInstructionSelector.cpp, I think maybe refactor them all in a new pr.
| ; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s | ||
| ; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| ; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s | |
| ; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s | |
| ; RUN: llc -global-isel -mtriple=riscv32-mattr=+m,+d,+zvfh,+v,+zvfbfmin < %s | FileCheck %s | |
| ; RUN: llc -global-isel -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v,+zvfbfmin < %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added =.
081176d to
7969b4e
Compare
OK now after rebase with #171091. |
No description provided.