Skip to content

Commit

Permalink
Reapply "[RISCV] Separate doLocalPostpass into new pass and move to p…
Browse files Browse the repository at this point in the history
…ost vector regalloc (#88295)"

The original commit was calling shrinkToUses on an interval for a virtual
register whose def was erased. This fixes it by calling shrinkToUses first
and removing the interval if we erase the old VL def.
  • Loading branch information
lukel97 committed Apr 24, 2024
1 parent 80f510b commit af82d01
Show file tree
Hide file tree
Showing 36 changed files with 1,283 additions and 1,358 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertVSETVLIPass();
void initializeRISCVInsertVSETVLIPass(PassRegistry &);

FunctionPass *createRISCVCoalesceVSETVLIPass();
void initializeRISCVCoalesceVSETVLIPass(PassRegistry &);

FunctionPass *createRISCVPostRAExpandPseudoPass();
void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertReadWriteCSRPass();
Expand Down
131 changes: 112 additions & 19 deletions llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,19 @@
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>
using namespace llvm;

#define DEBUG_TYPE "riscv-insert-vsetvli"
#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"

STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");
STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");

static cl::opt<bool> DisableInsertVSETVLPHIOpt(
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
Expand Down Expand Up @@ -190,6 +193,11 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
if (UseMO.getReg() == RISCV::NoRegister)
return true;

if (UseMO.isUndef())
return true;
if (UseMO.getReg().isPhysical())
return false;

if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
if (UseMI->isImplicitDef())
return true;
Expand Down Expand Up @@ -780,18 +788,52 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
VSETVLIInfo &Info) const;
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
void doLocalPostpass(MachineBasicBlock &MBB);
void doPRE(MachineBasicBlock &MBB);
void insertReadVL(MachineBasicBlock &MBB);
};

class RISCVCoalesceVSETVLI : public MachineFunctionPass {
public:
static char ID;
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;

RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();

AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveDebugVariables>();
AU.addPreserved<LiveStacks>();

MachineFunctionPass::getAnalysisUsage(AU);
}

StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }

private:
bool coalesceVSETVLIs(MachineBasicBlock &MBB);
};

} // end anonymous namespace

char RISCVInsertVSETVLI::ID = 0;

INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)

char RISCVCoalesceVSETVLI::ID = 0;

INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
RISCV_COALESCE_VSETVLI_NAME, false, false)

// Return a VSETVLIInfo representing the changes made by this VSETVLI or
// VSETIVLI instruction.
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
Expand Down Expand Up @@ -1515,12 +1557,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,

auto &AVL = MI.getOperand(1);
auto &PrevAVL = PrevMI.getOperand(1);
assert(MRI.isSSA());

// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
// For now just check that PrevMI uses the same virtual register.
if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
(!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()))
(!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
PrevAVL.getReg() != AVL.getReg()))
return false;
}

Expand All @@ -1530,7 +1572,7 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
return areCompatibleVTYPEs(PriorVType, VType, Used);
}

void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
// must be considered demanded.
Expand Down Expand Up @@ -1563,20 +1605,50 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {

if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
Register DefReg = NextMI->getOperand(0).getReg();

MI.getOperand(0).setReg(DefReg);
MI.getOperand(0).setIsDead(false);

// The def of DefReg moved to MI, so extend the LiveInterval up to
// it.
if (DefReg.isVirtual()) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
DefLI.addSegment(S);
DefVNI->def = MISlot;
// Mark DefLI as spillable if it was previously unspillable
DefLI.setWeight(0);

// DefReg may have had no uses, in which case we need to shrink
// the LiveInterval up to MI.
LIS->shrinkToUses(&DefLI);
}

Register OldVLReg;
if (MI.getOperand(1).isReg())
OldVLReg = MI.getOperand(1).getReg();
if (NextMI->getOperand(1).isImm())
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
else
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
if (OldVLReg) {

// Clear NextMI's AVL early so we're not counting it as a use.
if (NextMI->getOperand(1).isReg())
NextMI->getOperand(1).setReg(RISCV::NoRegister);

if (OldVLReg && OldVLReg.isVirtual()) {
// NextMI no longer uses OldVLReg so shrink its LiveInterval.
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));

MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
MRI->use_nodbg_empty(OldVLReg))
MRI->use_nodbg_empty(OldVLReg)) {
VLOpDef->eraseFromParent();
LIS->removeInterval(OldVLReg);
}
}
MI.setDesc(NextMI->getDesc());
}
Expand All @@ -1589,9 +1661,13 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
Used = getDemanded(MI, MRI, ST);
}

NumRemovedVSETVL += ToDelete.size();
for (auto *MI : ToDelete)
NumCoalescedVSETVL += ToDelete.size();
for (auto *MI : ToDelete) {
LIS->RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
}

return !ToDelete.empty();
}

void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
Expand Down Expand Up @@ -1666,15 +1742,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
emitVSETVLIs(MBB);

// Now that all vsetvlis are explicit, go through and do block local
// DSE and peephole based demanded fields based transforms. Note that
// this *must* be done outside the main dataflow so long as we allow
// any cross block analysis within the dataflow. We can't have both
// demanded fields based mutation and non-local analysis in the
// dataflow at the same time without introducing inconsistencies.
for (MachineBasicBlock &MBB : MF)
doLocalPostpass(MBB);

// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
for (MachineBasicBlock &MBB : MF)
Expand All @@ -1688,3 +1755,29 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
return new RISCVInsertVSETVLI();
}

// Now that all vsetvlis are explicit, go through and do block local
// DSE and peephole based demanded fields based transforms. Note that
// this *must* be done outside the main dataflow so long as we allow
// any cross block analysis within the dataflow. We can't have both
// demanded fields based mutation and non-local analysis in the
// dataflow at the same time without introducing inconsistencies.
bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
ST = &MF.getSubtarget<RISCVSubtarget>();
if (!ST->hasVInstructions())
return false;
TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();

bool Changed = false;
for (MachineBasicBlock &MBB : MF)
Changed |= coalesceVSETVLIs(MBB);

return Changed;
}

FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
return new RISCVCoalesceVSETVLI();
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVFoldMasksPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
initializeRISCVCoalesceVSETVLIPass(*PR);
initializeRISCVInsertReadWriteCSRPass(*PR);
initializeRISCVInsertWriteVXRMPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
Expand Down Expand Up @@ -388,12 +389,14 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {

bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
addPass(createRISCVCoalesceVSETVLIPass());
return TargetPassConfig::addRegAssignAndRewriteFast();
}

bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
addPass(createRISCVCoalesceVSETVLIPass());
return TargetPassConfig::addRegAssignAndRewriteOptimized();
}

Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
; CHECK-NEXT: Fixup Statepoint Caller Saved
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Greedy Register Allocator
; CHECK-NEXT: Virtual Register Rewriter
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Virtual Register Map
; CHECK-NEXT: Live Register Matrix
; CHECK-NEXT: Greedy Register Allocator
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x float> poison, float %e0, i64 0
Expand Down Expand Up @@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
Expand All @@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa5, fa5, fa4
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -496,8 +496,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmin.d fa5, fa5, fa4
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -580,8 +580,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -656,8 +656,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
Expand All @@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
Expand Down Expand Up @@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; V128-NEXT: vid.v v8
; V128-NEXT: vsrl.vi v8, v8, 1
; V128-NEXT: vmv.v.i v0, 10
; V128-NEXT: vadd.vi v8, v8, 1
; V128-NEXT: vmv.v.i v0, 10
; V128-NEXT: vrgather.vv v10, v9, v8, v0.t
; V128-NEXT: vmv.v.v v8, v10
; V128-NEXT: ret
Expand All @@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu
; V512-NEXT: vid.v v8
; V512-NEXT: vsrl.vi v8, v8, 1
; V512-NEXT: vmv.v.i v0, 10
; V512-NEXT: vadd.vi v8, v8, 1
; V512-NEXT: vmv.v.i v0, 10
; V512-NEXT: vrgather.vv v10, v9, v8, v0.t
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
Expand Down
Loading

0 comments on commit af82d01

Please sign in to comment.