From 11a7e77c95ddcb51779d9e9d804222eb45a1da92 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 17 May 2022 11:29:39 -0700 Subject: [PATCH] [RISCV] Canonicalize AVL=setvli to AVL=Imm or AVL=VLMAX This patch adds a transform to the local prepass in InsertVSETVLI which canonicalizes an AVL of a register from another vsetvli into immediate or VLMAX when VTYPE is the same. In this patch, I chose to be conservative and avoid arbitrary vreg forwarding due to profitability concerns about possibility overlapping live ranges. This has the effect of eliminating vsetvli instructions in loops which are walking either VLMAX or a constant number of lanes per iteration. Differential Revision: https://reviews.llvm.org/D125812 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 23 +++++++++++++++++++ .../RISCV/rvv/vsetvli-insert-crossbb.ll | 14 +++++------ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 0ef70dacce086..4062859177046 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1242,6 +1242,29 @@ void RISCVInsertVSETVLI::doLocalPrepass(MachineBasicBlock &MBB) { } } } + + // If AVL is defined by a vsetvli with the same vtype, we can + // replace the AVL operand with the AVL of the defining vsetvli. + // We avoid general register AVLs to avoid extending live ranges + // without being sure we can kill the original source reg entirely. + // TODO: We can ignore policy bits here, we only need VL to be the same. + if (Require.hasAVLReg() && Require.getAVLReg().isVirtual()) { + if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { + if (isVectorConfigInstr(*DefMI)) { + VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); + if (DefInfo.hasSameVTYPE(Require) && + (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { + MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); + if (DefInfo.hasAVLImm()) + VLOp.ChangeToImmediate(DefInfo.getAVLImm()); + else + VLOp.ChangeToRegister(DefInfo.getAVLReg(), /*IsDef*/ false); + CurInfo = computeInfoForInstr(MI, TSFlags, MRI); + continue; + } + } + } + } } CurInfo = computeInfoForInstr(MI, TSFlags, MRI); continue; diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index bdaa993435589..ba48e964c8814 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -591,21 +591,20 @@ define void @vlmax(i64 %N, double* %c, double* %a, double* %b) { ; CHECK-NEXT: blez a0, .LBB11_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: li t1, 0 +; CHECK-NEXT: li t0, 0 ; CHECK-NEXT: slli a7, a6, 3 ; CHECK-NEXT: .LBB11_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add t0, a2, a5 -; CHECK-NEXT: vsetvli zero, a6, e64, m1, ta, mu -; CHECK-NEXT: vle64.v v8, (t0) +; CHECK-NEXT: add a4, a2, a5 +; CHECK-NEXT: vle64.v v8, (a4) ; CHECK-NEXT: add a4, a3, a5 ; CHECK-NEXT: vle64.v v9, (a4) ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: add a4, a1, a5 ; CHECK-NEXT: vse64.v v8, (a4) -; CHECK-NEXT: add t1, t1, a6 +; CHECK-NEXT: add t0, t0, a6 ; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: blt t1, a0, .LBB11_2 +; CHECK-NEXT: blt t0, a0, .LBB11_2 ; CHECK-NEXT: .LBB11_3: # %for.end ; CHECK-NEXT: ret entry: @@ -645,7 +644,6 @@ define void @vector_init_vlmax(i64 %N, double* %c) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB12_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a3, a3, a2 ; CHECK-NEXT: add a1, a1, a4 @@ -719,7 +717,7 @@ define void @vector_init_vsetvli_fv(i64 %N, double* %c) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB14_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetvli zero, a3, e64, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a1, a1, a4