Skip to content

Commit

Permalink
Revert "[RISCV] Separate doLocalPostpass into new pass and move to po…
Browse files Browse the repository at this point in the history
…st vector regalloc (#88295)"

Seems to cause an address sanitizer failure on one of the buildbots related
to live intervals.
  • Loading branch information
lukel97 committed Apr 24, 2024
1 parent fb302b1 commit fc13353
Show file tree
Hide file tree
Showing 36 changed files with 1,356 additions and 1,280 deletions.
3 changes: 0 additions & 3 deletions llvm/lib/Target/RISCV/RISCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertVSETVLIPass();
void initializeRISCVInsertVSETVLIPass(PassRegistry &);

FunctionPass *createRISCVCoalesceVSETVLIPass();
void initializeRISCVCoalesceVSETVLIPass(PassRegistry &);

FunctionPass *createRISCVPostRAExpandPseudoPass();
void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertReadWriteCSRPass();
Expand Down
126 changes: 17 additions & 109 deletions llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,16 @@
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>
using namespace llvm;

#define DEBUG_TYPE "riscv-insert-vsetvli"
#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"

STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");

static cl::opt<bool> DisableInsertVSETVLPHIOpt(
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
Expand Down Expand Up @@ -193,11 +190,6 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
if (UseMO.getReg() == RISCV::NoRegister)
return true;

if (UseMO.isUndef())
return true;
if (UseMO.getReg().isPhysical())
return false;

if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
if (UseMI->isImplicitDef())
return true;
Expand Down Expand Up @@ -788,52 +780,18 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
VSETVLIInfo &Info) const;
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
void doLocalPostpass(MachineBasicBlock &MBB);
void doPRE(MachineBasicBlock &MBB);
void insertReadVL(MachineBasicBlock &MBB);
};

class RISCVCoalesceVSETVLI : public MachineFunctionPass {
public:
static char ID;
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;

RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();

AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveDebugVariables>();
AU.addPreserved<LiveStacks>();

MachineFunctionPass::getAnalysisUsage(AU);
}

StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }

private:
bool coalesceVSETVLIs(MachineBasicBlock &MBB);
};

} // end anonymous namespace

char RISCVInsertVSETVLI::ID = 0;

INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)

char RISCVCoalesceVSETVLI::ID = 0;

INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
RISCV_COALESCE_VSETVLI_NAME, false, false)

// Return a VSETVLIInfo representing the changes made by this VSETVLI or
// VSETIVLI instruction.
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
Expand Down Expand Up @@ -1557,12 +1515,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,

auto &AVL = MI.getOperand(1);
auto &PrevAVL = PrevMI.getOperand(1);
assert(MRI.isSSA());

// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
// For now just check that PrevMI uses the same virtual register.
if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
(!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
PrevAVL.getReg() != AVL.getReg()))
(!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()))
return false;
}

Expand All @@ -1572,7 +1530,7 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
return areCompatibleVTYPEs(PriorVType, VType, Used);
}

bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
// must be considered demanded.
Expand Down Expand Up @@ -1605,49 +1563,20 @@ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {

if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
Register DefReg = NextMI->getOperand(0).getReg();

MI.getOperand(0).setReg(DefReg);
MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
MI.getOperand(0).setIsDead(false);

// The def of DefReg moved to MI, so extend the LiveInterval up to
// it.
if (DefReg.isVirtual()) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
DefLI.addSegment(S);
DefVNI->def = MISlot;
// Mark DefLI as spillable if it was previously unspillable
DefLI.setWeight(0);

// DefReg may have had no uses, in which case we need to shrink
// the LiveInterval up to MI.
LIS->shrinkToUses(&DefLI);
}

Register OldVLReg;
if (MI.getOperand(1).isReg())
OldVLReg = MI.getOperand(1).getReg();
if (NextMI->getOperand(1).isImm())
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
else
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);

// Clear NextMI's AVL early so we're not counting it as a use.
if (NextMI->getOperand(1).isReg())
NextMI->getOperand(1).setReg(RISCV::NoRegister);

if (OldVLReg) {
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
MRI->use_nodbg_empty(OldVLReg))
VLOpDef->eraseFromParent();

// NextMI no longer uses OldVLReg so shrink its LiveInterval.
if (OldVLReg.isVirtual())
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
}
MI.setDesc(NextMI->getDesc());
}
Expand All @@ -1660,13 +1589,9 @@ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
Used = getDemanded(MI, MRI, ST);
}

NumCoalescedVSETVL += ToDelete.size();
for (auto *MI : ToDelete) {
LIS->RemoveMachineInstrFromMaps(*MI);
NumRemovedVSETVL += ToDelete.size();
for (auto *MI : ToDelete)
MI->eraseFromParent();
}

return !ToDelete.empty();
}

void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
Expand Down Expand Up @@ -1741,6 +1666,15 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
emitVSETVLIs(MBB);

// Now that all vsetvlis are explicit, go through and do block local
// DSE and peephole based demanded fields based transforms. Note that
// this *must* be done outside the main dataflow so long as we allow
// any cross block analysis within the dataflow. We can't have both
// demanded fields based mutation and non-local analysis in the
// dataflow at the same time without introducing inconsistencies.
for (MachineBasicBlock &MBB : MF)
doLocalPostpass(MBB);

// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
for (MachineBasicBlock &MBB : MF)
Expand All @@ -1754,29 +1688,3 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
return new RISCVInsertVSETVLI();
}

// Now that all vsetvlis are explicit, go through and do block local
// DSE and peephole based demanded fields based transforms. Note that
// this *must* be done outside the main dataflow so long as we allow
// any cross block analysis within the dataflow. We can't have both
// demanded fields based mutation and non-local analysis in the
// dataflow at the same time without introducing inconsistencies.
bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
ST = &MF.getSubtarget<RISCVSubtarget>();
if (!ST->hasVInstructions())
return false;
TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();

bool Changed = false;
for (MachineBasicBlock &MBB : MF)
Changed |= coalesceVSETVLIs(MBB);

return Changed;
}

FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
return new RISCVCoalesceVSETVLI();
}
3 changes: 0 additions & 3 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVFoldMasksPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
initializeRISCVCoalesceVSETVLIPass(*PR);
initializeRISCVInsertReadWriteCSRPass(*PR);
initializeRISCVInsertWriteVXRMPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
Expand Down Expand Up @@ -389,14 +388,12 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {

bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
addPass(createRISCVCoalesceVSETVLIPass());
return TargetPassConfig::addRegAssignAndRewriteFast();
}

bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
addPass(createRISCVCoalesceVSETVLIPass());
return TargetPassConfig::addRegAssignAndRewriteOptimized();
}

Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,6 @@
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
; CHECK-NEXT: Fixup Statepoint Caller Saved
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Greedy Register Allocator
; CHECK-NEXT: Virtual Register Rewriter
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Virtual Register Map
; CHECK-NEXT: Live Register Matrix
; CHECK-NEXT: Greedy Register Allocator
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x float> poison, float %e0, i64 0
Expand Down Expand Up @@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
Expand All @@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa5, fa5, fa4
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -496,8 +496,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmin.d fa5, fa5, fa4
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -580,8 +580,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -656,8 +656,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
Expand All @@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
Expand Down Expand Up @@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; V128-NEXT: vid.v v8
; V128-NEXT: vsrl.vi v8, v8, 1
; V128-NEXT: vadd.vi v8, v8, 1
; V128-NEXT: vmv.v.i v0, 10
; V128-NEXT: vadd.vi v8, v8, 1
; V128-NEXT: vrgather.vv v10, v9, v8, v0.t
; V128-NEXT: vmv.v.v v8, v10
; V128-NEXT: ret
Expand All @@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu
; V512-NEXT: vid.v v8
; V512-NEXT: vsrl.vi v8, v8, 1
; V512-NEXT: vadd.vi v8, v8, 1
; V512-NEXT: vmv.v.i v0, 10
; V512-NEXT: vadd.vi v8, v8, 1
; V512-NEXT: vrgather.vv v10, v9, v8, v0.t
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
Expand Down
Loading

0 comments on commit fc13353

Please sign in to comment.