Skip to content

Commit

Permalink
[RISCV] Support postRA vsetvl insertion pass (#70549)
Browse files Browse the repository at this point in the history
This patch try to get rid of vsetvl implict vl/vtype def-use chain and
improve the register allocation quality by moving the vsetvl insertion
pass after RVV register allocation

It will gain the benefit for the following optimization from

1. unblock scheduler's constraints by removing vl/vtype def-use chain
2. Support RVV re-materialization
3. Support partial spill

This patch add a new option `-riscv-vsetvl-after-rvv-regalloc=<1|0>` to
control this feature and default set as disable.
  • Loading branch information
BeMg committed May 21, 2024
1 parent 9c78481 commit 675e7bd
Show file tree
Hide file tree
Showing 242 changed files with 9,721 additions and 8,870 deletions.
19 changes: 15 additions & 4 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ static cl::opt<bool> EnableMISchedLoadClustering(
cl::desc("Enable load clustering in the machine scheduler"),
cl::init(false));

static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
"riscv-vsetvl-after-rvv-regalloc", cl::Hidden,
cl::desc("Insert vsetvls after vector register allocation"),
cl::init(true));

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
Expand Down Expand Up @@ -389,6 +394,8 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {

bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
if (EnableVSETVLIAfterRVVRegAlloc)
addPass(createRISCVInsertVSETVLIPass());
addPass(createRISCVCoalesceVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
Expand All @@ -399,6 +406,8 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
if (EnableVSETVLIAfterRVVRegAlloc)
addPass(createRISCVInsertVSETVLIPass());
addPass(createRISCVCoalesceVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
Expand Down Expand Up @@ -547,10 +556,12 @@ void RISCVPassConfig::addPreRegAlloc() {

// Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after
// register coalescing so needVSETVLIPHI doesn't need to look through COPYs.
if (TM->getOptLevel() == CodeGenOptLevel::None)
insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
else
insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
if (!EnableVSETVLIAfterRVVRegAlloc) {
if (TM->getOptLevel() == CodeGenOptLevel::None)
insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
else
insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
}
}

void RISCVPassConfig::addFastRegAlloc() {
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,12 @@
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: Register Coalescer
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Rename Disconnected Subregister Components
; CHECK-NEXT: Machine Instruction Scheduler
; CHECK-NEXT: Machine Block Frequency Analysis
Expand All @@ -142,6 +141,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Greedy Register Allocator
; CHECK-NEXT: Virtual Register Rewriter
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: RISC-V Dead register definitions
; CHECK-NEXT: Virtual Register Map
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,36 +24,36 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48)
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46)
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
Expand All @@ -71,12 +71,12 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, 1048572
; CHECK-NEXT: addi a0, a0, 928
; CHECK-NEXT: vmsbc.vx v0, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
; CHECK-NEXT: vsext.vf2 v10, v8, v0.t
; CHECK-NEXT: lui a0, %hi(var_47)
; CHECK-NEXT: addi a0, a0, %lo(var_47)
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,18 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
; RV32-NEXT: sw a0, 16(sp)
; RV32-NEXT: addi a2, sp, 16
; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a2), zero
; RV32-NEXT: vid.v v8
; RV32-NEXT: vlse64.v v8, (a2), zero
; RV32-NEXT: vid.v v16
; RV32-NEXT: li a2, -1
; RV32-NEXT: vmadd.vx v8, a2, v16
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vmadd.vx v16, a2, v8
; RV32-NEXT: addi a2, sp, 32
; RV32-NEXT: vl2r.v v16, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vmsne.vi v0, v16, 0
; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vmsne.vi v0, v8, 0
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: vmerge.vim v16, v16, -1, v0
; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
; RV32-NEXT: vand.vv v8, v16, v8
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vmv.x.s a2, v8
; RV32-NEXT: sltu a3, a0, a2
Expand All @@ -108,15 +108,15 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
; RV64: # %bb.0:
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV64-NEXT: vmv.v.x v24, a0
; RV64-NEXT: vid.v v16
; RV64-NEXT: vmv.v.x v16, a0
; RV64-NEXT: vid.v v24
; RV64-NEXT: li a1, -1
; RV64-NEXT: vmadd.vx v16, a1, v24
; RV64-NEXT: vmadd.vx v24, a1, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
; RV64-NEXT: vmerge.vvm v8, v8, v24, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: sub a0, a0, a1
Expand Down
Loading

0 comments on commit 675e7bd

Please sign in to comment.