-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Improve cleanup phase of RISCV Insert VSETVLI pass #67144
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-risc-v ChangesMake the pass better at eliminating redundant VSETVLIs in certain scenarios, as demonstrated in the updated tests. Patch is 54.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67144.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 5584fa8d503dbe4..138c28e4effabf6 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1426,43 +1426,6 @@ static bool isNonZeroAVL(const MachineOperand &MO) {
return 0 != MO.getImm();
}
-// Return true if we can mutate PrevMI to match MI without changing any the
-// fields which would be observed.
-static bool canMutatePriorConfig(const MachineInstr &PrevMI,
- const MachineInstr &MI,
- const DemandedFields &Used) {
- // If the VL values aren't equal, return false if either a) the former is
- // demanded, or b) we can't rewrite the former to be the later for
- // implementation reasons.
- if (!isVLPreservingConfig(MI)) {
- if (Used.VLAny)
- return false;
-
- // We don't bother to handle the equally zero case here as it's largely
- // uninteresting.
- if (Used.VLZeroness) {
- if (isVLPreservingConfig(PrevMI))
- return false;
- if (!isNonZeroAVL(MI.getOperand(1)) ||
- !isNonZeroAVL(PrevMI.getOperand(1)))
- return false;
- }
-
- // TODO: Track whether the register is defined between
- // PrevMI and MI.
- if (MI.getOperand(1).isReg() &&
- RISCV::X0 != MI.getOperand(1).getReg())
- return false;
- }
-
- if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
- return false;
-
- auto PriorVType = PrevMI.getOperand(2).getImm();
- auto VType = MI.getOperand(2).getImm();
- return areCompatibleVTYPEs(PriorVType, VType, Used);
-}
-
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
@@ -1475,36 +1438,74 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
if (!isVectorConfigInstr(MI)) {
doUnion(Used, getDemanded(MI, MRI));
+ // We can't handle the case when the source AVL
+ // register of *NextMI is defined after MI
+ if (NextMI && NextMI->getOperand(1).isReg() && MI.getOperand(0).isReg() &&
+ MI.getOperand(0).getReg() == NextMI->getOperand(1).getReg())
+ Used.demandVL();
continue;
}
Register VRegDef = MI.getOperand(0).getReg();
if (VRegDef != RISCV::X0 &&
- !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
- Used.demandVL();
+ !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) {
+ for (MachineInstr &UserMI : MRI->use_nodbg_instructions(VRegDef)) {
+ // ignore uses by *NextMI since we will deal with them explicitly
+ if (!NextMI || !UserMI.isIdenticalTo(*NextMI)) {
+ Used.demandVL();
+ break;
+ }
+ }
+ }
if (NextMI) {
- if (!Used.usedVL() && !Used.usedVTYPE()) {
- ToDelete.push_back(&MI);
- // Leave NextMI unchanged
- continue;
- } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
- if (!isVLPreservingConfig(*NextMI)) {
- MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
- MI.getOperand(0).setIsDead(false);
- if (NextMI->getOperand(1).isImm())
- MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
- else
- MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
- MI.setDesc(NextMI->getDesc());
+ auto NextVRegDef = NextMI->getOperand(0).getReg();
+ auto &AVLOp = MI.getOperand(1), &NextAVLOp = NextMI->getOperand(1);
+ auto &VLTypeOp = MI.getOperand(2), &NextVLTypeOp = NextMI->getOperand(2);
+ if (areCompatibleVTYPEs(VLTypeOp.getImm(), NextVLTypeOp.getImm(), Used)) {
+ bool VLUsed =
+ Used.VLAny || (Used.VLZeroness &&
+ (!isNonZeroAVL(AVLOp) || !isNonZeroAVL(NextAVLOp)));
+ if (isVLPreservingConfig(*NextMI) || !VLUsed) {
+ // set VLType of MI to that of *NextMI
+ VLTypeOp.setImm(NextVLTypeOp.getImm());
+ // remove *NextMI and potentially replace its uses with MI
+ ToDelete.push_back(NextMI);
+ if (NextVRegDef != RISCV::X0 && !MRI->use_nodbg_empty(NextVRegDef)) {
+ if (VRegDef != RISCV::X0)
+ MRI->replaceRegWith(NextVRegDef, VRegDef);
+ else {
+ MI.getOperand(0).ChangeToRegister(
+ MRI->createVirtualRegister(&RISCV::GPRRegClass), true);
+ MRI->replaceRegWith(NextVRegDef, MI.getOperand(0).getReg());
+ }
+ }
+ // is the AVL op of *NextMI a non-zero reg storing the result of MI?
+ bool KeepAVL = NextAVLOp.isReg() && NextAVLOp.getReg() == VRegDef &&
+ VRegDef != RISCV::X0;
+ // set AVL of MI to that of *NextMI whenever it makes sense to do so
+ if (!isVLPreservingConfig(*NextMI) && !KeepAVL) {
+ if (NextAVLOp.isImm())
+ AVLOp.ChangeToImmediate(NextAVLOp.getImm());
+ else {
+ AVLOp.ChangeToRegister(NextAVLOp.getReg(), false);
+ // ensure that *MI did not inadvertently become VL-preserving
+ if (VRegDef == RISCV::X0 && AVLOp.getReg() == RISCV::X0)
+ MI.getOperand(0).ChangeToRegister(
+ MRI->createVirtualRegister(&RISCV::GPRRegClass), true);
+ }
+ MI.setDesc(NextMI->getDesc());
+ }
}
- MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
- ToDelete.push_back(NextMI);
- // fallthrough
}
}
NextMI = &MI;
Used = getDemanded(MI, MRI);
+ // ignore such uses since we can handle them explicitly
+ if (VRegDef == RISCV::X0) {
+ Used.VLAny = false;
+ Used.VLZeroness = false;
+ }
}
for (auto *MI : ToDelete)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 49724cbc4418252..1bb197c84799b83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -2530,9 +2530,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB35_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
@@ -2557,9 +2556,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB35_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -2577,9 +2575,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
@@ -2590,9 +2587,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
@@ -2613,9 +2609,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
@@ -2626,9 +2621,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -2680,9 +2674,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB36_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
@@ -2707,9 +2700,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB36_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -2727,9 +2719,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB36_6
@@ -2740,9 +2731,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB36_7
@@ -2763,9 +2753,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB36_11
@@ -2776,9 +2765,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -2834,9 +2822,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB37_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
@@ -2862,9 +2849,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB37_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -2883,9 +2869,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB37_6
@@ -2897,9 +2882,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB37_7
@@ -2922,9 +2906,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB37_11
@@ -2936,9 +2919,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -2992,9 +2974,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB38_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
@@ -3019,9 +3000,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB38_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
@@ -3039,9 +3019,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB38_6
@@ -3052,9 +3031,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB38_7
@@ -3075,9 +3053,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB38_11
@@ -3088,9 +3065,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -3143,9 +3119,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB39_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
@@ -3170,9 +3145,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB39_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
@@ -3190,9 +3164,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV...
[truncated]
|
@@ -1475,36 +1438,74 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { | |||
|
|||
if (!isVectorConfigInstr(MI)) { | |||
doUnion(Used, getDemanded(MI, MRI)); | |||
// We can't handle the case when the source AVL | |||
// register of *NextMI is defined after MI | |||
if (NextMI && NextMI->getOperand(1).isReg() && MI.getOperand(0).isReg() && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MI here can be any instruction right? What if it has more than 1 def?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or it might not even be a def. It could be a store instruction that uses the same register?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To address this, I've tried to make the check as general as possible: 22d8abb.
✅ With the latest revision this PR passed the C/C++ code formatter. |
15e2a7f
to
22d8abb
Compare
@@ -329,9 +329,9 @@ entry: | |||
define double @test17(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind { | |||
; CHECK-LABEL: test17: | |||
; CHECK: # %bb.0: # %entry | |||
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, ma | |||
; CHECK-NEXT: vfmv.f.s fa5, v8 | |||
; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, ma |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can be removed too? It just sets vl
, vtype
will be changed by vsetvli
below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was due to a use by an instruction which was marked for deletion. Unfortunately, to account for such cases I needed to make ToDelete
a SmallPtrSet
so that I could quickly discard instructions added to this set: e8f2fbe
Gentle ping |
Make the pass better at eliminating redundant VSETVLIs in certain scenarios, as demonstrated in the updated tests.
702b488
to
1c1b326
Compare
@@ -834,8 +834,7 @@ define <vscale x 2 x i32> @pre_lmul(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y | |||
; CHECK-LABEL: pre_lmul: | |||
; CHECK: # %bb.0: # %entry | |||
; CHECK-NEXT: andi a0, a0, 1 | |||
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma | |||
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma | |||
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma | |||
; CHECK-NEXT: vadd.vv v8, v8, v9 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems odd.
According to the LLVM IR, is should be processing VLMAX number of element of SEW = 64, LMUL = 1.
Suppose RVV VLEN = 128bit:
SEW = 64, LMUL = 1, the vsetvlmax output should be 2 elements.
SEW = 32, LMUL = 1, the vsetvlmax output should be 4 elements.
According to LLVM IR:
%vl = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
%a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %vl)
%vl output should be 2 elements, then later "vadd" should be processing 2 elements.
Here the new assembly after this patch is:
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v8, v9
The "vadd.vv" is processing 4 elements.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the purpose of vsetvli zero, a1, e32, m1, ta, ma
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch! I think it is a wrong optimization here.
What is the purpose of
vsetvli zero, a1, e32, m1, ta, ma
?
It sets vl
to a1
(result of %vl = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
) and changes vtype
to e32, m1
, which matches semantics of LLVM IR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the optimization therefore be the single instruction vsetvli a1, zero, e64, m1, ta, ma
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think so.
We need vl
to be VLMAX
of e64, m1
and vtype
to be e32, m1
(so the final vl
will be half of the VLMAX
of e32, m1
). Optimization may not be feasible here.
Make the pass better at eliminating redundant VSETVLIs in certain scenarios, as demonstrated in the updated tests.