-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: refactor phi lowering from SILowerI1Copies (NFCI) #75349
AMDGPU: refactor phi lowering from SILowerI1Copies (NFCI) #75349
Conversation
Make abstract class PhiLoweringHelper and expose it for use in GlobalISel path. SILowerI1Copies implements PhiLoweringHelper as Vreg1LoweringHelper and it is equivalent to SILowerI1Copies. Notable change that createLaneMaskReg now clones attributes from register that has lane mask attributes instead of creating register with lane mask register class. This is because lane masks have different(more) attributes in GlobalISel.
@llvm/pr-subscribers-backend-amdgpu Author: Petar Avramovic (petar-avramovic) ChangesMake abstract class PhiLoweringHelper and expose it for use in GlobalISel path. patch 2 from: #73337 Patch is 23.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75349.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 68c8f4024e7300..cfa0c21def791d 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -21,57 +21,26 @@
//
//===----------------------------------------------------------------------===//
+#include "SILowerI1Copies.h"
#include "AMDGPU.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#define DEBUG_TYPE "si-i1-copies"
using namespace llvm;
-static unsigned createLaneMaskReg(MachineFunction &MF);
-static unsigned insertUndefLaneMask(MachineBasicBlock &MBB);
+static Register insertUndefLaneMask(MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ Register LaneMaskRegAttrs);
namespace {
-struct Incoming {
- Register Reg;
- MachineBasicBlock *Block;
- Register UpdatedReg;
-
- Incoming(Register Reg, MachineBasicBlock *Block, Register UpdatedReg)
- : Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {}
-};
-
class SILowerI1Copies : public MachineFunctionPass {
public:
static char ID;
-private:
- bool IsWave32 = false;
- MachineFunction *MF = nullptr;
- MachineDominatorTree *DT = nullptr;
- MachinePostDominatorTree *PDT = nullptr;
- MachineRegisterInfo *MRI = nullptr;
- const GCNSubtarget *ST = nullptr;
- const SIInstrInfo *TII = nullptr;
-
- unsigned ExecReg;
- unsigned MovOp;
- unsigned AndOp;
- unsigned OrOp;
- unsigned XorOp;
- unsigned AndN2Op;
- unsigned OrN2Op;
-
- DenseSet<unsigned> ConstrainRegs;
-
-public:
SILowerI1Copies() : MachineFunctionPass(ID) {
initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
}
@@ -86,29 +55,53 @@ class SILowerI1Copies : public MachineFunctionPass {
AU.addRequired<MachinePostDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
+};
+
+class Vreg1LoweringHelper : public PhiLoweringHelper {
+public:
+ Vreg1LoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT);
private:
- bool lowerCopiesFromI1();
- bool lowerPhis();
- bool lowerCopiesToI1();
- bool isConstantLaneMask(Register Reg, bool &Val) const;
+ DenseSet<Register> ConstrainRegs;
+
+public:
+ void markAsLaneMask(Register DstReg) const override;
+ void getCandidatesForLowering(
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
+ void collectIncomingValuesFromPhi(
+ const MachineInstr *MI,
+ SmallVectorImpl<Incoming> &Incomings) const override;
+ void replaceDstReg(Register NewReg, Register OldReg,
+ MachineBasicBlock *MBB) override;
void buildMergeLaneMasks(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
- unsigned DstReg, unsigned PrevReg, unsigned CurReg);
- MachineBasicBlock::iterator
- getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+ Register DstReg, Register PrevReg,
+ Register CurReg) override;
+ void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
+ bool lowerCopiesFromI1();
+ bool lowerCopiesToI1();
+ bool cleanConstrainRegs(bool Changed);
bool isVreg1(Register Reg) const {
return Reg.isVirtual() && MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
}
-
- bool isLaneMaskReg(unsigned Reg) const {
- return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
- TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
- ST->getWavefrontSize();
- }
};
+Vreg1LoweringHelper::Vreg1LoweringHelper(MachineFunction *MF,
+ MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT)
+ : PhiLoweringHelper(MF, DT, PDT) {}
+
+bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) {
+ assert(Changed || ConstrainRegs.empty());
+ for (Register Reg : ConstrainRegs)
+ MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
+ ConstrainRegs.clear();
+
+ return Changed;
+}
+
/// Helper class that determines the relationship between incoming values of a
/// phi in the control flow graph to determine where an incoming value can
/// simply be taken as a scalar lane mask as-is, and where it needs to be
@@ -311,6 +304,7 @@ class LoopFinder {
/// blocks, so that the SSA updater doesn't have to search all the way to the
/// function entry.
void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
+ MachineRegisterInfo &MRI, Register LaneMaskRegAttrs,
ArrayRef<Incoming> Incomings = {}) {
assert(LoopLevel < CommonDominators.size());
@@ -319,13 +313,15 @@ class LoopFinder {
Dom = DT.findNearestCommonDominator(Dom, Incoming.Block);
if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {
- SSAUpdater.AddAvailableValue(Dom, insertUndefLaneMask(*Dom));
+ SSAUpdater.AddAvailableValue(
+ Dom, insertUndefLaneMask(Dom, &MRI, LaneMaskRegAttrs));
} else {
// The dominator is part of the loop or the given blocks, so add the
// undef value to unreachable predecessors instead.
for (MachineBasicBlock *Pred : Dom->predecessors()) {
if (!inLoopLevel(*Pred, LoopLevel, Incomings))
- SSAUpdater.AddAvailableValue(Pred, insertUndefLaneMask(*Pred));
+ SSAUpdater.AddAvailableValue(
+ Pred, insertUndefLaneMask(Pred, &MRI, LaneMaskRegAttrs));
}
}
}
@@ -415,19 +411,19 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
return new SILowerI1Copies();
}
-static unsigned createLaneMaskReg(MachineFunction &MF) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- return MRI.createVirtualRegister(ST.isWave32() ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::SReg_64RegClass);
+Register llvm::createLaneMaskReg(MachineRegisterInfo *MRI,
+ Register LaneMaskRegAttrs) {
+ return MRI->cloneVirtualRegister(LaneMaskRegAttrs);
}
-static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
- MachineFunction &MF = *MBB.getParent();
+static Register insertUndefLaneMask(MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ Register LaneMaskRegAttrs) {
+ MachineFunction &MF = *MBB->getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
- unsigned UndefReg = createLaneMaskReg(MF);
- BuildMI(MBB, MBB.getFirstTerminator(), {}, TII->get(AMDGPU::IMPLICIT_DEF),
+ Register UndefReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
+ BuildMI(*MBB, MBB->getFirstTerminator(), {}, TII->get(AMDGPU::IMPLICIT_DEF),
UndefReg);
return UndefReg;
}
@@ -444,47 +440,17 @@ static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
// Only need to run this in SelectionDAG path.
if (TheMF.getProperties().hasProperty(
- MachineFunctionProperties::Property::Selected))
+ MachineFunctionProperties::Property::Selected))
return false;
- MF = &TheMF;
- MRI = &MF->getRegInfo();
- DT = &getAnalysis<MachineDominatorTree>();
- PDT = &getAnalysis<MachinePostDominatorTree>();
-
- ST = &MF->getSubtarget<GCNSubtarget>();
- TII = ST->getInstrInfo();
- IsWave32 = ST->isWave32();
-
- if (IsWave32) {
- ExecReg = AMDGPU::EXEC_LO;
- MovOp = AMDGPU::S_MOV_B32;
- AndOp = AMDGPU::S_AND_B32;
- OrOp = AMDGPU::S_OR_B32;
- XorOp = AMDGPU::S_XOR_B32;
- AndN2Op = AMDGPU::S_ANDN2_B32;
- OrN2Op = AMDGPU::S_ORN2_B32;
- } else {
- ExecReg = AMDGPU::EXEC;
- MovOp = AMDGPU::S_MOV_B64;
- AndOp = AMDGPU::S_AND_B64;
- OrOp = AMDGPU::S_OR_B64;
- XorOp = AMDGPU::S_XOR_B64;
- AndN2Op = AMDGPU::S_ANDN2_B64;
- OrN2Op = AMDGPU::S_ORN2_B64;
- }
+ Vreg1LoweringHelper Helper(&TheMF, &getAnalysis<MachineDominatorTree>(),
+ &getAnalysis<MachinePostDominatorTree>());
bool Changed = false;
- Changed |= lowerCopiesFromI1();
- Changed |= lowerPhis();
- Changed |= lowerCopiesToI1();
-
- assert(Changed || ConstrainRegs.empty());
- for (unsigned Reg : ConstrainRegs)
- MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
- ConstrainRegs.clear();
-
- return Changed;
+ Changed |= Helper.lowerCopiesFromI1();
+ Changed |= Helper.lowerPhis();
+ Changed |= Helper.lowerCopiesToI1();
+ return Helper.cleanConstrainRegs(Changed);
}
#ifndef NDEBUG
@@ -496,7 +462,7 @@ static bool isVRegCompatibleReg(const SIRegisterInfo &TRI,
}
#endif
-bool SILowerI1Copies::lowerCopiesFromI1() {
+bool Vreg1LoweringHelper::lowerCopiesFromI1() {
bool Changed = false;
SmallVector<MachineInstr *, 4> DeadCopies;
@@ -539,23 +505,43 @@ bool SILowerI1Copies::lowerCopiesFromI1() {
return Changed;
}
-bool SILowerI1Copies::lowerPhis() {
+PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
+ MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT)
+ : MF(MF), DT(DT), PDT(PDT) {
+ MRI = &MF->getRegInfo();
+
+ ST = &MF->getSubtarget<GCNSubtarget>();
+ TII = ST->getInstrInfo();
+ IsWave32 = ST->isWave32();
+
+ if (IsWave32) {
+ ExecReg = AMDGPU::EXEC_LO;
+ MovOp = AMDGPU::S_MOV_B32;
+ AndOp = AMDGPU::S_AND_B32;
+ OrOp = AMDGPU::S_OR_B32;
+ XorOp = AMDGPU::S_XOR_B32;
+ AndN2Op = AMDGPU::S_ANDN2_B32;
+ OrN2Op = AMDGPU::S_ORN2_B32;
+ } else {
+ ExecReg = AMDGPU::EXEC;
+ MovOp = AMDGPU::S_MOV_B64;
+ AndOp = AMDGPU::S_AND_B64;
+ OrOp = AMDGPU::S_OR_B64;
+ XorOp = AMDGPU::S_XOR_B64;
+ AndN2Op = AMDGPU::S_ANDN2_B64;
+ OrN2Op = AMDGPU::S_ORN2_B64;
+ }
+}
+
+bool PhiLoweringHelper::lowerPhis() {
MachineSSAUpdater SSAUpdater(*MF);
LoopFinder LF(*DT, *PDT);
PhiIncomingAnalysis PIA(*PDT, TII);
SmallVector<MachineInstr *, 4> Vreg1Phis;
SmallVector<Incoming, 4> Incomings;
-#ifndef NDEBUG
- DenseSet<unsigned> PhiRegisters;
-#endif
-
- for (MachineBasicBlock &MBB : *MF) {
- for (MachineInstr &MI : MBB.phis()) {
- if (isVreg1(MI.getOperand(0).getReg()))
- Vreg1Phis.push_back(&MI);
- }
- }
+ getCandidatesForLowering(Vreg1Phis);
if (Vreg1Phis.empty())
return false;
@@ -571,28 +557,10 @@ bool SILowerI1Copies::lowerPhis() {
LLVM_DEBUG(dbgs() << "Lower PHI: " << *MI);
Register DstReg = MI->getOperand(0).getReg();
- MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::SReg_64RegClass);
-
- // Collect incoming values.
- for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
- assert(i + 1 < MI->getNumOperands());
- Register IncomingReg = MI->getOperand(i).getReg();
- MachineBasicBlock *IncomingMBB = MI->getOperand(i + 1).getMBB();
- MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
-
- if (IncomingDef->getOpcode() == AMDGPU::COPY) {
- IncomingReg = IncomingDef->getOperand(1).getReg();
- assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
- assert(!IncomingDef->getOperand(1).getSubReg());
- } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
- continue;
- } else {
- assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
- }
+ markAsLaneMask(DstReg);
+ initializeLaneMaskRegisterAttributes(DstReg);
- Incomings.emplace_back(IncomingReg, IncomingMBB, Register{});
- }
+ collectIncomingValuesFromPhi(MI, Incomings);
// Sort the incomings such that incoming values that dominate other incoming
// values are sorted earlier. This allows us to do some amount of on-the-fly
@@ -625,10 +593,11 @@ bool SILowerI1Copies::lowerPhis() {
SSAUpdater.Initialize(DstReg);
if (FoundLoopLevel) {
- LF.addLoopEntries(FoundLoopLevel, SSAUpdater, Incomings);
+ LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs,
+ Incomings);
for (auto &Incoming : Incomings) {
- Incoming.UpdatedReg = createLaneMaskReg(*MF);
+ Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
SSAUpdater.AddAvailableValue(Incoming.Block, Incoming.UpdatedReg);
}
@@ -644,14 +613,16 @@ bool SILowerI1Copies::lowerPhis() {
PIA.analyze(MBB, Incomings);
for (MachineBasicBlock *MBB : PIA.predecessors())
- SSAUpdater.AddAvailableValue(MBB, insertUndefLaneMask(*MBB));
+ SSAUpdater.AddAvailableValue(
+ MBB, insertUndefLaneMask(MBB, MRI, LaneMaskRegAttrs));
for (auto &Incoming : Incomings) {
MachineBasicBlock &IMBB = *Incoming.Block;
if (PIA.isSource(IMBB)) {
+ constrainIncomingRegisterTakenAsIs(Incoming);
SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
} else {
- Incoming.UpdatedReg = createLaneMaskReg(*MF);
+ Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
SSAUpdater.AddAvailableValue(&IMBB, Incoming.UpdatedReg);
}
}
@@ -669,7 +640,7 @@ bool SILowerI1Copies::lowerPhis() {
Register NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB);
if (NewReg != DstReg) {
- MRI->replaceRegWith(NewReg, DstReg);
+ replaceDstReg(NewReg, DstReg, &MBB);
MI->eraseFromParent();
}
@@ -678,7 +649,7 @@ bool SILowerI1Copies::lowerPhis() {
return true;
}
-bool SILowerI1Copies::lowerCopiesToI1() {
+bool Vreg1LoweringHelper::lowerCopiesToI1() {
bool Changed = false;
MachineSSAUpdater SSAUpdater(*MF);
LoopFinder LF(*DT, *PDT);
@@ -705,8 +676,9 @@ bool SILowerI1Copies::lowerCopiesToI1() {
LLVM_DEBUG(dbgs() << "Lower Other: " << MI);
- MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::SReg_64RegClass);
+ markAsLaneMask(DstReg);
+ initializeLaneMaskRegisterAttributes(DstReg);
+
if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
continue;
@@ -716,7 +688,7 @@ bool SILowerI1Copies::lowerCopiesToI1() {
if (!SrcReg.isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
- unsigned TmpReg = createLaneMaskReg(*MF);
+ Register TmpReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
.addReg(SrcReg)
.addImm(0);
@@ -739,7 +711,7 @@ bool SILowerI1Copies::lowerCopiesToI1() {
if (FoundLoopLevel) {
SSAUpdater.Initialize(DstReg);
SSAUpdater.AddAvailableValue(&MBB, DstReg);
- LF.addLoopEntries(FoundLoopLevel, SSAUpdater);
+ LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs);
buildMergeLaneMasks(MBB, MI, DL, DstReg,
SSAUpdater.GetValueInMiddleOfBlock(&MBB), SrcReg);
@@ -754,7 +726,7 @@ bool SILowerI1Copies::lowerCopiesToI1() {
return Changed;
}
-bool SILowerI1Copies::isConstantLaneMask(Register Reg, bool &Val) const {
+bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const {
const MachineInstr *MI;
for (;;) {
MI = MRI->getUniqueVRegDef(Reg);
@@ -807,7 +779,7 @@ static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
/// Return a point at the end of the given \p MBB to insert SALU instructions
/// for lane mask calculation. Take terminators and SCC into account.
MachineBasicBlock::iterator
-SILowerI1Copies::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
+PhiLoweringHelper::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
auto InsertionPt = MBB.getFirstTerminator();
bool TerminatorsUseSCC = false;
for (auto I = InsertionPt, E = MBB.end(); I != E; ++I) {
@@ -833,10 +805,53 @@ SILowerI1Copies::getSaluInsertionAtEnd(MachineBasicBlock &MBB) const {
llvm_unreachable("SCC used by terminator but no def in block");
}
-void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DstReg,
- unsigned PrevReg, unsigned CurReg) {
+// VReg_1 -> SReg_32 or SReg_64
+void Vreg1LoweringHelper::markAsLaneMask(Register DstReg) const {
+ MRI->setRegClass(DstReg, ST->getBoolRC());
+}
+
+void Vreg1LoweringHelper::getCandidatesForLowering(
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB.phis()) {
+ if (isVreg1(MI.getOperand(0).getReg()))
+ Vreg1Phis.push_back(&MI);
+ }
+ }
+}
+
+void Vreg1LoweringHelper::collectIncomingValuesFromPhi(
+ const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
+ for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
+ assert(i + 1 < MI->getNumOperands());
+ Register IncomingReg = MI->getOperand(i).getReg();
+ MachineBasicBlock *IncomingMBB = MI->getOperand(i + 1).getMBB();
+ MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg);
+
+ if (IncomingDef->getOpcode() == AMDGPU::COPY) {
+ IncomingReg = IncomingDef->getOperand(1).getReg();
+ assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
+ assert(!IncomingDef->getOperand(1).getSubReg());
+ } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
+ continue;
+ } else {
+ assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
+ }
+
+ Incomings.emplace_back(IncomingReg, IncomingMBB, Register());
+ }
+}
+
+void Vreg1LoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
+ MachineBasicBlock *MBB) {
+ MRI->replaceRegWith(NewReg, OldReg);
+}
+
+void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ Register DstReg, Register PrevReg,
+ Register CurReg) {
bool PrevVal = false;
bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
bool CurVal = false;
@@ -855,13 +870,13 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
return;
}
- unsigned PrevMaskedReg = 0;
- unsigned CurMaskedReg = 0;
+ Register PrevMaskedReg;
+ Register CurMaskedReg;
if (!PrevConstant) {
if (CurConstant && CurVal) {
PrevMaskedReg = PrevReg;
} else {
- PrevMaskedReg = createLaneMaskReg(*MF);
+ PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
.addReg(PrevReg)
.addReg(ExecReg);
@@ -872,7 +887,7 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
if (PrevConstant && PrevVal) {
CurMaskedReg = CurReg;
} else {
- CurMaskedReg = createLaneMaskReg(*MF);
+ CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
.addReg(CurReg)
.addReg(ExecReg);
@@ -895,3 +910,7 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
.addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
}
}
+
+void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
+ return;
+}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
new file mode 100644
index 00000000000000..5099d39c2d1415
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -0,0 +1,97 @@
+//===-- SILowerI1Copies.h --------------------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Except...
[truncated]
|
bool Vreg1LoweringHelper::cleanConstrainRegs(bool Changed) { | ||
assert(Changed || ConstrainRegs.empty()); | ||
for (Register Reg : ConstrainRegs) | ||
MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This probably shouldn't be a register class anymore
…d-gfx:bbaefb59377a Local branch amd-gfx bbaefb5 Manually merged main:3c423722cfd75bf293e42001145fe9ebad53c522 into amd-gfx:52eb0adfd010 Remote branch main d37ced8 AMDGPU: refactor phi lowering from SILowerI1Copies (NFCI) (llvm#75349)
Basic implementation of lane mask merging for GlobalISel. Lane masks on GlobalISel are registers with sgpr register class and S1 LLT - required by machine uniformity analysis. Implements equivalent of lowerPhis from SILowerI1Copies.cpp in: patch 1: llvm#75340 patch 2: llvm#75349 patch 3: llvm#80003 patch 4: llvm#78431 patch 5: is in this commit: AMDGPU/GlobalISelDivergenceLowering: constrain incoming registers Previously, in PHIs that represent lane masks, incoming registers taken as-is were not selected as lane masks. Such registers are not being merged with another lane mask and most often only have S1 LLT. Implement constrainAsLaneMask by constraining incoming registers taken as-is with lane mask attributes, essentially transforming them to lane masks. This is final step in having PHI instructions created in this pass to be fully instruction-selected.
Basic implementation of lane mask merging for GlobalISel. Lane masks on GlobalISel are registers with sgpr register class and S1 LLT - required by machine uniformity analysis. Implements equivalent of lowerPhis from SILowerI1Copies.cpp in: patch 1: #75340 patch 2: #75349 patch 3: #80003 patch 4: #78431 patch 5: is in this commit: AMDGPU/GlobalISelDivergenceLowering: constrain incoming registers Previously, in PHIs that represent lane masks, incoming registers taken as-is were not selected as lane masks. Such registers are not being merged with another lane mask and most often only have S1 LLT. Implement constrainAsLaneMask by constraining incoming registers taken as-is with lane mask attributes, essentially transforming them to lane masks. This is final step in having PHI instructions created in this pass to be fully instruction-selected.
Make abstract class PhiLoweringHelper and expose it for use in GlobalISel path.
SILowerI1Copies implements PhiLoweringHelper as Vreg1LoweringHelper and it is equivalent to SILowerI1Copies.
Notable change that createLaneMaskReg now clones attributes from register that has lane mask attributes instead of creating register with lane mask register class. This is because lane masks have different(more) attributes in GlobalISel.
patch 2 from: #73337