Skip to content

Commit 47b756a

Browse files
authored
[RISCV] Only reduce VLs of instructions with demanded VLs (#168693)
In RISCVVLOptimizer we first compute all the demanded VLs, then we walk backwards through the function and try to reduce any VLs. We don't actually need to walk backwards anymore since after #124530 the order in which we modify the instructions doesn't matter. This patch changes it to just iterate over the instructions with a demanded VL computed, which means we don't iterate over scalar instructions etc. This also fixes #168665, where we triggered an assert on instructions with a dead $vxsat implicit-def: dead %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat Because $vxsat is a reserved register, DeadMachineInstructionElim won't remove it and the instruction makes it to RISCVVLOptimizer. And because the def of %x is dead, we don't reach this instruction in the dataflow analysis. This instruction returns true for isCandidate, so we would try to lookup its demanded VL which doesn't exist and assert. But with this patch we don't try to reduce instructions that aren't in DemandedVLs, which fixes the crash.
1 parent 3f151a3 commit 47b756a

File tree

3 files changed

+61
-31
lines changed

3 files changed

+61
-31
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ class RISCVVLOptimizer : public MachineFunctionPass {
8585
DemandedVL getMinimumVLForUser(const MachineOperand &UserOp) const;
8686
/// Returns true if the users of \p MI have compatible EEWs and SEWs.
8787
bool checkUsers(const MachineInstr &MI) const;
88-
bool tryReduceVL(MachineInstr &MI) const;
88+
bool tryReduceVL(MachineInstr &MI, MachineOperand VL) const;
8989
bool isCandidate(const MachineInstr &MI) const;
9090
void transfer(const MachineInstr &MI);
9191

@@ -1607,7 +1607,8 @@ bool RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
16071607
return true;
16081608
}
16091609

1610-
bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
1610+
bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI,
1611+
MachineOperand CommonVL) const {
16111612
LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI);
16121613

16131614
unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
@@ -1620,50 +1621,47 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
16201621
return false;
16211622
}
16221623

1623-
auto *CommonVL = &DemandedVLs.at(&MI).VL;
1624-
1625-
assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&
1624+
assert((CommonVL.isImm() || CommonVL.getReg().isVirtual()) &&
16261625
"Expected VL to be an Imm or virtual Reg");
16271626

16281627
// If the VL is defined by a vleff that doesn't dominate MI, try using the
16291628
// vleff's AVL. It will be greater than or equal to the output VL.
1630-
if (CommonVL->isReg()) {
1631-
const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
1629+
if (CommonVL.isReg()) {
1630+
const MachineInstr *VLMI = MRI->getVRegDef(CommonVL.getReg());
16321631
if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) &&
16331632
!MDT->dominates(VLMI, &MI))
1634-
CommonVL = &VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
1633+
CommonVL = VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
16351634
}
16361635

1637-
if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {
1636+
if (!RISCV::isVLKnownLE(CommonVL, VLOp)) {
16381637
LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
16391638
return false;
16401639
}
16411640

1642-
if (CommonVL->isIdenticalTo(VLOp)) {
1641+
if (CommonVL.isIdenticalTo(VLOp)) {
16431642
LLVM_DEBUG(
16441643
dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n");
16451644
return false;
16461645
}
16471646

1648-
if (CommonVL->isImm()) {
1647+
if (CommonVL.isImm()) {
16491648
LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
1650-
<< CommonVL->getImm() << " for " << MI << "\n");
1651-
VLOp.ChangeToImmediate(CommonVL->getImm());
1649+
<< CommonVL.getImm() << " for " << MI << "\n");
1650+
VLOp.ChangeToImmediate(CommonVL.getImm());
16521651
return true;
16531652
}
1654-
const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
1653+
const MachineInstr *VLMI = MRI->getVRegDef(CommonVL.getReg());
16551654
if (!MDT->dominates(VLMI, &MI)) {
16561655
LLVM_DEBUG(dbgs() << " Abort due to VL not dominating.\n");
16571656
return false;
16581657
}
1659-
LLVM_DEBUG(
1660-
dbgs() << " Reduce VL from " << VLOp << " to "
1661-
<< printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo())
1662-
<< " for " << MI << "\n");
1658+
LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
1659+
<< printReg(CommonVL.getReg(), MRI->getTargetRegisterInfo())
1660+
<< " for " << MI << "\n");
16631661

16641662
// All our checks passed. We can reduce VL.
1665-
VLOp.ChangeToRegister(CommonVL->getReg(), false);
1666-
MRI->constrainRegClass(CommonVL->getReg(), &RISCV::GPRNoX0RegClass);
1663+
VLOp.ChangeToRegister(CommonVL.getReg(), false);
1664+
MRI->constrainRegClass(CommonVL.getReg(), &RISCV::GPRNoX0RegClass);
16671665
return true;
16681666
}
16691667

@@ -1718,18 +1716,13 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
17181716
// Then go through and see if we can reduce the VL of any instructions to
17191717
// only what's demanded.
17201718
bool MadeChange = false;
1721-
for (MachineBasicBlock &MBB : MF) {
1722-
// Avoid unreachable blocks as they have degenerate dominance
1723-
if (!MDT->isReachableFromEntry(&MBB))
1719+
for (auto &[MI, VL] : DemandedVLs) {
1720+
assert(MDT->isReachableFromEntry(MI->getParent()));
1721+
if (!isCandidate(*MI))
17241722
continue;
1725-
1726-
for (auto &MI : reverse(MBB)) {
1727-
if (!isCandidate(MI))
1728-
continue;
1729-
if (!tryReduceVL(MI))
1730-
continue;
1731-
MadeChange = true;
1732-
}
1723+
if (!tryReduceVL(*const_cast<MachineInstr *>(MI), VL.VL))
1724+
continue;
1725+
MadeChange = true;
17331726
}
17341727

17351728
DemandedVLs.clear();

llvm/test/CodeGen/RISCV/rvv/vl-opt.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,3 +325,28 @@ bar:
325325
%c = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 2, iXLen 2)
326326
ret <vscale x 4 x i32> %c
327327
}
328+
329+
; The vsmul.vx ends up dead, but doesn't get deleted before RISCVVLOptimizer. Make
330+
; sure we don't crash when we handle it.
331+
; TODO: DeadMachineInstructionElim should remove the dead vsmul.vx.
332+
define <vscale x 2 x i64> @dead_vsmul() {
333+
; CHECK-LABEL: dead_vsmul:
334+
; CHECK: # %bb.0: # %entry
335+
; CHECK-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
336+
; CHECK-NEXT: vmv.v.i v10, 0
337+
; CHECK-NEXT: csrwi vxrm, 0
338+
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
339+
; CHECK-NEXT: vmv.v.i v8, 0
340+
; CHECK-NEXT: vsetivli zero, 0, e64, m2, tu, ma
341+
; CHECK-NEXT: vmv.v.v v8, v8
342+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, ma
343+
; CHECK-NEXT: vsmul.vx v10, v10, zero
344+
; CHECK-NEXT: ret
345+
entry:
346+
%0 = call <vscale x 2 x i16> @llvm.riscv.vsmul(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i16 0, iXLen 0, iXLen 0)
347+
%1 = call <vscale x 2 x i32> @llvm.riscv.vwmacc(<vscale x 2 x i32> zeroinitializer, i16 0, <vscale x 2 x i16> %0, iXLen 0, iXLen 0)
348+
%2 = call <vscale x 2 x i64> @llvm.riscv.vwmul(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> %1, <vscale x 2 x i32> zeroinitializer, iXLen 0)
349+
%3 = call <vscale x 2 x i64> @llvm.riscv.vmerge(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %2, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), iXLen 0)
350+
351+
ret <vscale x 2 x i64> %3
352+
}

llvm/test/CodeGen/RISCV/rvv/vl-opt.mir

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,18 @@ body: |
393393
$v8 = COPY %y
394394
...
395395
---
396+
name: vxsat_instr_dead
397+
body: |
398+
bb.0:
399+
; CHECK-LABEL: name: vxsat_instr_dead
400+
; CHECK: %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat
401+
; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
402+
; CHECK-NEXT: $v8 = COPY %y
403+
%x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat
404+
%y:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
405+
$v8 = COPY %y
406+
...
407+
---
396408
name: copy
397409
body: |
398410
bb.0:

0 commit comments

Comments
 (0)