-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[WIP][CodeGen] Encode liveness for COPY instructions after virtRegRewriter pass. #151123
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[WIP][CodeGen] Encode liveness for COPY instructions after virtRegRewriter pass. #151123
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-regalloc Author: Vikash Gupta (vg0204) ChangesAs for the Greedy RA, the virtRegRewriter pass is the last place that holds livenes info, even at subregister level. So, now that information can be extracted and encoded on COPY instruction. This information for COPY can later be used to identify partially live regsiters precisely, assuming the liveness information used is not invalidated by any kind if IR muatation later. It enables us to stop using implict & implicit-def operands while expandinf the copies later in COpyPhysReg. NOTE: Currently, as we do not support any MachineOperand or MachineOperand attribute( associated with Register operand) that could hold liveness information as the 64-bit laneBitmask, so just for sake of EXPERIMENTATION, encoded this info as an immediate 64-bit unsigned integer. Full diff: https://github.com/llvm/llvm-project/pull/151123.diff 2 Files Affected:
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 4c83f8a580aa0..1f125c2cf87de 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1323,7 +1323,7 @@ def REG_SEQUENCE : StandardPseudoInstruction {
}
def COPY : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
- let InOperandList = (ins unknown:$src);
+ let InOperandList = (ins unknown:$src, variable_ops);
let AsmString = "";
let hasSideEffects = false;
let isAsCheapAsAMove = true;
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 99ba893d6f096..227c0ae813934 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -213,6 +213,8 @@ class VirtRegRewriter {
void rewrite();
void addMBBLiveIns();
bool readsUndefSubreg(const MachineOperand &MO) const;
+ uint64_t calcLiveRegUnitMask(const MachineOperand &MO,
+ MCRegister PhysReg) const;
void addLiveInsForSubRanges(const LiveInterval &LI, MCRegister PhysReg) const;
void handleIdentityCopy(MachineInstr &MI);
void expandCopyBundle(MachineInstr &MI) const;
@@ -474,6 +476,77 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
return true;
}
+// Return LaneBitmask value as unint64_t for PhysReg assigned to MO,
+// representing its live register units at its parent MI. In case of undef or
+// fully live MO, return 0u.
+uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO,
+ MCRegister PhysReg) const {
+ Register Reg = MO.getReg();
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex MIIndex = LIS->getInstructionIndex(MI);
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask UseMask = SubRegIdx
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : (Reg.isVirtual() ? MRI->getMaxLaneMaskForVReg(Reg)
+ : LaneBitmask::getNone());
+
+ LaneBitmask LiveRegUnitMask;
+ DenseSet<unsigned> LiveRegUnits;
+
+ // dbgs() << "\n********** " << printReg(Reg, TRI) << "[ " <<
+ // printReg(PhysReg, TRI) << " ]" << " **********\n";
+
+ if (MO.isUndef())
+ return 0u;
+
+ assert(LI.liveAt(MIIndex) &&
+ "Reads of completely dead register should be marked undef already");
+
+ if (LI.hasSubRanges()) {
+ for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = (*Units).first;
+ LaneBitmask Mask = (*Units).second;
+ for (const LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & UseMask & Mask).any() && S.liveAt(MIIndex)) {
+ LiveRegUnits.insert(Unit);
+ }
+ }
+ }
+ } else {
+ for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = (*Units).first;
+ const LiveRange &UnitRange = LIS->getRegUnit(Unit);
+ LaneBitmask Mask = (*Units).second;
+
+ if (UnitRange.liveAt(MIIndex) && (UseMask & Mask).any())
+ LiveRegUnits.insert(Unit);
+ }
+ }
+
+ // Consider the exact subregister & create new UseMask as per the RC for it.
+ if (SubRegIdx != 0) {
+ PhysReg = TRI->getSubReg(PhysReg, SubRegIdx);
+ UseMask = (TRI->getMinimalPhysRegClass(PhysReg))->getLaneMask();
+ }
+
+ for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = (*Units).first;
+ LaneBitmask Mask = (*Units).second;
+ if (LiveRegUnits.count(Unit)) {
+ // dbgs() << "LIVE DEF UNIT : " << printRegUnit(Unit, TRI) << '\n';
+ LiveRegUnitMask |= Mask;
+ }
+ }
+
+ // dbgs() << "UseMask : " << PrintLaneMask(UseMask) << '\n';
+ // dbgs() << "LiveRegUnitMask : " << PrintLaneMask(LiveRegUnitMask) << '\n';
+ if (UseMask == LiveRegUnitMask)
+ return 0u;
+
+ return LiveRegUnitMask.getAsInteger();
+}
+
void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) {
if (!MI.isIdentityCopy())
return;
@@ -495,7 +568,11 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) {
// give us additional liveness information: The target (super-)register
// must not be valid before this point. Replace the COPY with a KILL
// instruction to maintain this information.
- if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) {
+
+ // Avoid COPY with an exact 3 operand, wiith third operand be Mask, as
+ // it same as a COPY with no additional liveness information.
+ if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 3 ||
+ (MI.getNumOperands() == 3 && !MI.getOperand(2).isImm())) {
MI.setDesc(TII->get(TargetOpcode::KILL));
LLVM_DEBUG(dbgs() << " replace by: " << MI);
return;
@@ -641,11 +718,14 @@ void VirtRegRewriter::rewrite() {
SmallVector<Register, 8> SuperDeads;
SmallVector<Register, 8> SuperDefs;
SmallVector<Register, 8> SuperKills;
+ uint64_t Mask;
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
LLVM_DEBUG(MBBI->print(dbgs(), Indexes));
for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) {
+ // reset for each MI.
+ Mask = 0u;
for (MachineOperand &MO : MI.operands()) {
// Make sure MRI knows about registers clobbered by regmasks.
if (MO.isRegMask())
@@ -663,6 +743,9 @@ void VirtRegRewriter::rewrite() {
RewriteRegs.insert(PhysReg);
assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
+ if (MO.isUse() && MI.isCopy())
+ Mask = calcLiveRegUnitMask(MO, PhysReg);
+
// Preserve semantics of sub-register operands.
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
@@ -739,6 +822,10 @@ void VirtRegRewriter::rewrite() {
MO.setIsRenamable(true);
}
+ // Add LaneBitmask as MO_Imm
+ if (MI.isCopy() && Mask)
+ MI.addOperand(*MF, MachineOperand::CreateImm(Mask));
+
// Add any missing super-register kills after rewriting the whole
// instruction.
while (!SuperKills.empty())
|
60ef9fa
to
08d44d4
Compare
As for the Greedy RA, the virtRegRewriter pass is the last place that holds livenes info, even at subregister level. So, now that information can be extracted and encoded on COPY instruction. This information for COPY can later be used to identify partially live regsiters precisely, assuming the liveness information used is not invalidated by any kind if IR muatation later.
08d44d4
to
8cb9a5e
Compare
It would be trivial to add an MO_LaneMask |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've been thinking of introducing a COPY for a lanemask as a substitute for the copy bundles splitkit produces, but it should be a separate opcode instead of acting like an implicit operand on COPY.
I think you should introduce the new operand type first, then the opcode separately from touching VirtRegMap
@@ -1323,7 +1323,7 @@ def REG_SEQUENCE : StandardPseudoInstruction { | |||
} | |||
def COPY : StandardPseudoInstruction { | |||
let OutOperandList = (outs unknown:$dst); | |||
let InOperandList = (ins unknown:$src); | |||
let InOperandList = (ins unknown:$src, variable_ops); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should not change the standard copy instruction
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its just for the experimentation purpose, as didn't know exactly where to encode the liveness info as laneMask while working on it! Will go with your suggestion for MO_laneMask now!
Sure, could you review the next PR related to it (#151124 ), dealing with using this livenewss to expand COPYs! |
Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This patch changes the definition of the COPY instruction. It needs the attention of a wider audience. You should include some folks from the community who review the CodGen changes.
// Return LaneBitmask value as uint64_t for PhysReg assigned to MO, | ||
// representing its live register units at its parent MI. In case of undef or | ||
// fully live MO, return 0u. | ||
uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why don't you return LaneBitmask as it is?
This can be later converted to uint64 value using getAsInteger()
whenever required.
@@ -641,11 +718,14 @@ void VirtRegRewriter::rewrite() { | |||
SmallVector<Register, 8> SuperDeads; | |||
SmallVector<Register, 8> SuperDefs; | |||
SmallVector<Register, 8> SuperKills; | |||
uint64_t Mask; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Again, declare it as LaneBitmask.
|
||
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); | ||
MBBI != MBBE; ++MBBI) { | ||
LLVM_DEBUG(MBBI->print(dbgs(), Indexes)); | ||
for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) { | ||
// reset for each MI. | ||
Mask = 0u; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use LaneBitmask::getNone() to clear all bits.
@@ -739,6 +822,10 @@ void VirtRegRewriter::rewrite() { | |||
MO.setIsRenamable(true); | |||
} | |||
|
|||
// Add LaneBitmask as MO_Imm |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Terminate all single-line comments with a period.
Do not include any links in the upstream PRs that can't be accessible to all. This internal referencing is unwanted. Instead, explain the actual problem here. |
It can have the tag as a Fixes SWDEV-xxx |
As for the Greedy RA, the virtRegRewriter pass is the last place that holds livenes info, even at subregister level. So, now that information can be extracted and encoded on COPY instruction. This is in reference to SWDEV-498533
This information for COPY can later be used to identify partially live regsiters precisely, assuming the liveness information used is not invalidated by any kind if IR muatation later[#151124 ]. It enables us to stop using implict & implicit-def operands while expandinf the copies later in COpyPhysReg.
NOTE: Currently, as we do not support any MachineOperand or MachineOperand attribute( associated with Register operand) that could hold liveness information as the 64-bit laneBitmask, so just for sake of EXPERIMENTATION, encoded this info as an immediate 64-bit unsigned integer.