-
Notifications
You must be signed in to change notification settings - Fork 15.4k
backport: [RISCV] Sources of vmerge shouldn't overlap V0 (#170070) #170604
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: release/21.x
Are you sure you want to change the base?
Conversation
According to the spec: > A vector register cannot be used to provide source operands with more > than one EEW for a single instruction. A mask register source is > considered to have EEW=1 for this constraint. There must be a mask `V0` in `vmerge` variants so the sources should use register classes without `V0`. This fixes llvm#169905. Co-authored-by: Luke Lau <luke@igalia.com>
There are two fixes: 1. Clear kill flags for `FalseReg` in foldVMergeToMask or we can't pass the MachineVerifier because of using a killed virtual register. 2. Restrict `lookThruCopies` to only look through COPYs with one non-debug use. This was found when backporting llvm#170070 to 21.x branch.
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-risc-v Author: Pengcheng Wang (wangpc-pp) ChangesThis PR contains two commits:
Patch is 137.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170604.diff 17 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index de9e55beb6a5e..63fb4ea091bb1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -3034,21 +3034,21 @@ multiclass VPseudoVFWALU_WV_WF_RM {
multiclass VPseudoVMRG_VM_XM_IM {
foreach m = MxList in {
defvar mx = m.MX;
- def "_VVM" # "_" # m.MX:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, m.vrclass, m>,
- SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx,
- forcePassthruRead=true>;
- def "_VXM" # "_" # m.MX:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, GPR, m>,
- SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx,
- forcePassthruRead=true>;
- def "_VIM" # "_" # m.MX:
- VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, simm5, m>,
- SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx,
- forcePassthruRead=true>;
+ def "_VVM"#"_"#m.MX : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R, m>,
+ SchedBinary<"WriteVIMergeV", "ReadVIMergeV", "ReadVIMergeV", mx,
+ forcePassthruRead = true>;
+ def "_VXM"#"_"#m.MX
+ : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R, GPR, m>,
+ SchedBinary<"WriteVIMergeX", "ReadVIMergeV", "ReadVIMergeX", mx,
+ forcePassthruRead = true>;
+ def "_VIM"#"_"#m.MX
+ : VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ GetVRegNoV0<m.vrclass>.R, simm5, m>,
+ SchedUnary<"WriteVIMergeI", "ReadVIMergeV", mx,
+ forcePassthruRead = true>;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index c1cc19b503deb..53a96ef3f1656 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -73,7 +73,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
bool isAllOnesMask(const MachineInstr *MaskDef) const;
std::optional<unsigned> getConstant(const MachineOperand &VL) const;
bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const;
- bool isKnownSameDefs(Register A, Register B) const;
+ Register lookThruCopies(Register Reg, bool OneUseOnly = false) const;
};
} // namespace
@@ -387,23 +387,21 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
return true;
}
-bool RISCVVectorPeephole::isKnownSameDefs(Register A, Register B) const {
- if (A.isPhysical() || B.isPhysical())
- return false;
-
- auto LookThruVirtRegCopies = [this](Register Reg) {
- while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) {
- if (!Def->isFullCopy())
- break;
- Register Src = Def->getOperand(1).getReg();
- if (!Src.isVirtual())
- break;
- Reg = Src;
- }
- return Reg;
- };
-
- return LookThruVirtRegCopies(A) == LookThruVirtRegCopies(B);
+// If \p Reg is defined by one or more COPYs of virtual registers, traverses
+// the chain and returns the root non-COPY source.
+Register RISCVVectorPeephole::lookThruCopies(Register Reg,
+ bool OneUseOnly) const {
+ while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) {
+ if (!Def->isFullCopy())
+ break;
+ Register Src = Def->getOperand(1).getReg();
+ if (!Src.isVirtual())
+ break;
+ if (OneUseOnly && !MRI->hasOneNonDBGUse(Reg))
+ break;
+ Reg = Src;
+ }
+ return Reg;
}
/// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the
@@ -428,10 +426,11 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
if (!TrueMaskedInfo || !hasSameEEW(MI, *True))
return false;
- const MachineOperand &TrueMask =
- True->getOperand(TrueMaskedInfo->MaskOpIdx + True->getNumExplicitDefs());
- const MachineOperand &MIMask = MI.getOperand(4);
- if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg()))
+ Register TrueMaskReg = lookThruCopies(
+ True->getOperand(TrueMaskedInfo->MaskOpIdx + True->getNumExplicitDefs())
+ .getReg());
+ Register MIMaskReg = lookThruCopies(MI.getOperand(4).getReg());
+ if (!TrueMaskReg.isVirtual() || TrueMaskReg != MIMaskReg)
return false;
// Masked off lanes past TrueVL will come from False, and converting to vmv
@@ -718,9 +717,10 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMERGE_VVM)
return false;
- Register PassthruReg = MI.getOperand(1).getReg();
- Register FalseReg = MI.getOperand(2).getReg();
- Register TrueReg = MI.getOperand(3).getReg();
+ Register PassthruReg = lookThruCopies(MI.getOperand(1).getReg());
+ Register FalseReg = lookThruCopies(MI.getOperand(2).getReg());
+ Register TrueReg =
+ lookThruCopies(MI.getOperand(3).getReg(), /*OneUseOnly=*/true);
if (!TrueReg.isVirtual() || !MRI->hasOneUse(TrueReg))
return false;
MachineInstr &True = *MRI->getUniqueVRegDef(TrueReg);
@@ -741,14 +741,15 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
// We require that either passthru and false are the same, or that passthru
// is undefined.
- if (PassthruReg && !isKnownSameDefs(PassthruReg, FalseReg))
+ if (PassthruReg && !(PassthruReg.isVirtual() && PassthruReg == FalseReg))
return false;
// If True has a passthru operand then it needs to be the same as vmerge's
// False, since False will be used for the result's passthru operand.
- Register TruePassthru = True.getOperand(True.getNumExplicitDefs()).getReg();
+ Register TruePassthru =
+ lookThruCopies(True.getOperand(True.getNumExplicitDefs()).getReg());
if (RISCVII::isFirstDefTiedToFirstUse(True.getDesc()) && TruePassthru &&
- !isKnownSameDefs(TruePassthru, FalseReg))
+ !(TruePassthru.isVirtual() && TruePassthru == FalseReg))
return false;
// Make sure it doesn't raise any observable fp exceptions, since changing the
@@ -818,6 +819,8 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
MRI->constrainRegClass(
MO.getReg(), True.getRegClassConstraint(MO.getOperandNo(), TII, TRI));
}
+ // We should clear the IsKill flag since we have a new use now.
+ MRI->clearKillFlags(FalseReg);
MI.eraseFromParent();
return true;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir
index f8061462c6220..ada76a43639d7 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir
@@ -11,7 +11,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv1i8
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -19,7 +19,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv1i8
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -40,7 +40,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv4i8
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -48,7 +48,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv4i8
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -69,7 +69,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv16i8
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV32I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]]
@@ -77,7 +77,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv16i8
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */
; RV64I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]]
@@ -98,7 +98,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv64i8
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -106,7 +106,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv64i8
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]]
@@ -127,7 +127,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv2i16
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -135,7 +135,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv2i16
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]]
@@ -156,7 +156,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv8i16
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV32I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]]
@@ -164,7 +164,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv8i16
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */
; RV64I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]]
@@ -185,7 +185,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv32i16
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]]
@@ -193,7 +193,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv32i16
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]]
@@ -214,7 +214,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv2i32
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV32I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]]
@@ -222,7 +222,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv2i32
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV64I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]]
@@ -243,7 +243,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv8i32
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV32I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]]
@@ -251,7 +251,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv8i32
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */
; RV64I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]]
@@ -272,7 +272,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv1i64
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */
; RV32I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]]
@@ -280,7 +280,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv1i64
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */
; RV64I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]]
@@ -301,7 +301,7 @@ body: |
bb.0.entry:
; RV32I-LABEL: name: select_nxv4i64
; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */
; RV32I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]]
@@ -309,7 +309,7 @@ body: |
;
; RV64I-LABEL: name: select_nxv4i64
; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF
- ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF
; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */
; RV64I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]]
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
index 0d8aff306252e..f614829b2f13a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -311,14 +311,14 @@ define i32 @test_nxv128i1(<vscale x 128 x i1> %x) {
; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v6, a0
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v7, a1
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a0
; CHECK-NEXT: vslidedown.vx v5, v6, a0
-; CHECK-NEXT: vslidedown.vx v4, v7, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v4
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
@@ -425,11 +425,12 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: vmerge.vim v16, v8, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v5, a1
+; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v6, a1
; CHECK-NEXT: vslidedown.vx v5, v7, a1
-; CHECK-NEXT: vslidedown.vx v4, v6, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v4
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
index 31e79e58f44c5..aba75ffe29d33 100644
--- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
@@ ...
[truncated]
|
This PR contains two commits:
[RISCV] Sources of vmerge shouldn't overlap V0 ([RISCV] Sources of vmerge shouldn't overlap V0 #170070)
According to the spec:
There must be a mask
V0invmergevariants so the sources shoulduse register classes without
V0.This fixes [RISCV] Wrong register allocation for vmerge.vvm #169905.
Co-authored-by: Luke Lau luke@igalia.com
[RISCV] Fix corner cases after [RISCV] Sources of vmerge shouldn't overlap V0 #170070 ([RISCV] Fix corner cases after #170070 #170438)
There are two fixes:
FalseRegin foldVMergeToMask or we can'tpass the MachineVerifier because of using a killed virtual register.
lookThruCopiesto only look through COPYs withone non-debug use.
This was found when backporting [RISCV] Sources of vmerge shouldn't overlap V0 #170070 to 21.x branch.