-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" #168353
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" #168353
Changes from all commits
73b490d
7228ceb
9478968
aebc086
78746b7
5e56c6b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -307,7 +307,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { | |
| /// number if it is not zero. If DstReg is a physical register and the | ||
| /// existing subregister number of the def / use being updated is not zero, | ||
| /// make sure to set it to the correct physical subregister. | ||
| void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); | ||
| /// | ||
| /// If \p SubregToRegSrcInsts is not empty, we are coalescing a | ||
| /// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an | ||
| /// implicit-def of DstReg on instructions that define SrcReg. | ||
| void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, | ||
| ArrayRef<MachineInstr *> SubregToRegSrcInsts = {}); | ||
|
|
||
| /// If the given machine operand reads only undefined lanes add an undef | ||
| /// flag. | ||
|
|
@@ -1444,6 +1449,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP, | |
|
|
||
| // CopyMI may have implicit operands, save them so that we can transfer them | ||
| // over to the newly materialized instruction after CopyMI is removed. | ||
| LaneBitmask NewMIImplicitOpsMask; | ||
| SmallVector<MachineOperand, 4> ImplicitOps; | ||
| ImplicitOps.reserve(CopyMI->getNumOperands() - | ||
| CopyMI->getDesc().getNumOperands()); | ||
|
|
@@ -1458,6 +1464,9 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP, | |
| (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) && | ||
| "unexpected implicit virtual register def"); | ||
| ImplicitOps.push_back(MO); | ||
| if (MO.isDef() && MO.getReg().isVirtual() && | ||
| MRI->shouldTrackSubRegLiveness(DstReg)) | ||
| NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1494,14 +1503,11 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP, | |
| } else { | ||
| assert(MO.getReg() == NewMI.getOperand(0).getReg()); | ||
|
|
||
| // We're only expecting another def of the main output, so the range | ||
| // should get updated with the regular output range. | ||
| // | ||
| // FIXME: The range updating below probably needs updating to look at | ||
| // the super register if subranges are tracked. | ||
| assert(!MRI->shouldTrackSubRegLiveness(DstReg) && | ||
| "subrange update for implicit-def of super register may not be " | ||
| "properly handled"); | ||
| // If lanemasks need to be tracked, compile the lanemask of the NewMI | ||
| // implicit def operands to avoid subranges for the super-regs from | ||
| // being removed by code later on in this function. | ||
| if (MRI->shouldTrackSubRegLiveness(MO.getReg())) | ||
| NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -1617,7 +1623,8 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP, | |
| *LIS->getSlotIndexes(), *TRI); | ||
|
|
||
| for (LiveInterval::SubRange &SR : DstInt.subranges()) { | ||
| if ((SR.LaneMask & DstMask).none()) { | ||
| if ((SR.LaneMask & DstMask).none() && | ||
| (SR.LaneMask & NewMIImplicitOpsMask).none()) { | ||
| LLVM_DEBUG(dbgs() | ||
| << "Removing undefined SubRange " | ||
| << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); | ||
|
|
@@ -1891,11 +1898,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, | |
| } | ||
| } | ||
|
|
||
| void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, | ||
| unsigned SubIdx) { | ||
| void RegisterCoalescer::updateRegDefsUses( | ||
| Register SrcReg, Register DstReg, unsigned SubIdx, | ||
| ArrayRef<MachineInstr *> SubregToRegSrcInsts) { | ||
| bool DstIsPhys = DstReg.isPhysical(); | ||
| LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); | ||
|
|
||
| // Coalescing a COPY may expose reads of 'undef' subregisters. | ||
| // If so, then explicitly propagate 'undef' to those operands. | ||
| if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { | ||
| for (MachineOperand &MO : MRI->reg_operands(DstReg)) { | ||
| if (MO.isUndef()) | ||
|
|
@@ -1912,6 +1922,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, | |
| } | ||
| } | ||
|
|
||
| // If DstInt already has a subrange for the unused lanes, then we shouldn't | ||
| // create duplicate subranges when we update the interval for unused lanes. | ||
| LaneBitmask DstIntLaneMask; | ||
| if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { | ||
| for (LiveInterval::SubRange &SR : DstInt->subranges()) | ||
| DstIntLaneMask |= SR.LaneMask; | ||
| } | ||
|
|
||
| // Go through all instructions to replace uses of 'SrcReg' by 'DstReg'. | ||
| SmallPtrSet<MachineInstr *, 8> Visited; | ||
| for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg), | ||
| E = MRI->reg_instr_end(); | ||
|
|
@@ -1935,6 +1954,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, | |
| if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) | ||
| Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); | ||
|
|
||
| bool RequiresImplicitRedef = false; | ||
| if (!SubregToRegSrcInsts.empty()) { | ||
| // We can only add an implicit-def and undef if the sub registers match, | ||
| // e.g. | ||
| // %0:gr32 = INSTX | ||
| // %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined | ||
| // %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32 | ||
| // | ||
| // This cannot be transformed into: | ||
| // %1.sub32:gr64 = INSTX | ||
| // undef %1.sub8:gr64 = INSTY , implicit-def %1 | ||
| // | ||
| // because the undef means that none of the bits of %1 are read, thus | ||
| // thrashing the top 24 bits of %1.sub32. | ||
| if (is_contained(SubregToRegSrcInsts, UseMI) && | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to use a |
||
| all_of(UseMI->defs(), [&SubIdx](const MachineOperand &MO) -> bool { | ||
| if (MO.isUndef()) | ||
| return true; | ||
| return SubIdx && (!MO.getSubReg() || SubIdx == MO.getSubReg()); | ||
| })) { | ||
| // Add implicit-def of super-register to express that the whole | ||
| // register is defined by the instruction. | ||
| MachineInstrBuilder MIB(*MF, UseMI); | ||
| MIB.addReg(DstReg, RegState::ImplicitDefine); | ||
| RequiresImplicitRedef = true; | ||
| } | ||
|
|
||
| // If the coalesed instruction doesn't fully define the register, we need | ||
| // to preserve the original super register liveness for SUBREG_TO_REG. | ||
| // | ||
| // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, | ||
| // but it introduces liveness for other subregisters. Downstream users may | ||
| // have been relying on those bits, so we need to ensure their liveness is | ||
| // captured with a def of other lanes. | ||
| if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { | ||
| // First check if there is sufficient granularity in terms of subranges. | ||
| LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); | ||
| LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); | ||
| LaneBitmask UnusedLanes = DstMask & ~UsedLanes; | ||
| if ((UnusedLanes & ~DstIntLaneMask).any()) { | ||
| BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); | ||
| DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt); | ||
| DstIntLaneMask |= UnusedLanes; | ||
| } | ||
|
|
||
| // After duplicating the live ranges for the low/hi bits, we | ||
| // need to update the subranges of the DstReg interval such that | ||
| // for a case like this: | ||
| // | ||
| // entry: | ||
| // 16B %1:gpr32 = INSTRUCTION (<=> UseMI) | ||
| // : | ||
| // if.then: | ||
| // 32B %1:gpr32 = MOVIMM32 .. | ||
| // 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32 | ||
|
Comment on lines
+2006
to
+2011
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm assuming in this example
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes that's right. The example of the values being defined in different blocks came from a reproducer, but the same might also happen in the one block. |
||
| // | ||
| // Only the MOVIMM32 requires a def of the top lanes and any intervals | ||
| // for the top 32-bits of the def at 16B should be removed. | ||
| for (LiveInterval::SubRange &SR : DstInt->subranges()) { | ||
| if (!Writes || RequiresImplicitRedef || | ||
| (SR.LaneMask & UnusedLanes).none()) | ||
| continue; | ||
|
|
||
| assert((SR.LaneMask & UnusedLanes) == SR.LaneMask && | ||
| "Unexpected lanemask. Subrange needs finer granularity"); | ||
|
|
||
| SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(); | ||
| auto SegmentI = SR.find(UseIdx); | ||
| if (SegmentI != SR.end()) | ||
| SR.removeSegment(SegmentI, true); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Replace SrcReg with DstReg in all UseMI operands. | ||
| for (unsigned Op : Ops) { | ||
| MachineOperand &MO = UseMI->getOperand(Op); | ||
|
|
@@ -1943,7 +2036,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, | |
| // turn a full def into a read-modify-write sub-register def and vice | ||
| // versa. | ||
| if (SubIdx && MO.isDef()) | ||
| MO.setIsUndef(!Reads); | ||
| MO.setIsUndef(!Reads || RequiresImplicitRedef); | ||
|
|
||
| // A subreg use of a partially undef (super) register may be a complete | ||
| // undef use now and then has to be marked that way. | ||
|
|
@@ -2046,6 +2139,38 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, | |
| LIS->shrinkToUses(&LI); | ||
| } | ||
|
|
||
| /// For a given use of value \p Idx, it returns the def in the current block, | ||
| /// or otherwise all possible defs in preceding blocks. | ||
| static bool findPrecedingDefs(SmallVector<MachineInstr *> &Instrs, | ||
| LiveIntervals *LIS, LiveInterval &SrcInt, | ||
| MachineBasicBlock *MBB, VNInfo *Idx) { | ||
| auto IsPrecedingDef = [&](VNInfo *Idx) -> bool { | ||
| if (Idx->isPHIDef()) | ||
| return false; | ||
| MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def); | ||
| assert(Def && "Unable to find a def for SUBREG_TO_REG source operand"); | ||
| Instrs.push_back(Def); | ||
| return true; | ||
| }; | ||
|
|
||
| if (IsPrecedingDef(Idx)) | ||
| return true; | ||
|
|
||
| SmallVector<MachineBasicBlock *> Worklist(MBB->pred_begin(), MBB->pred_end()); | ||
| SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks; | ||
| while (!Worklist.empty()) { | ||
| MachineBasicBlock *MBB = Worklist.pop_back_val(); | ||
| auto [_, Inserted] = VisitedBlocks.insert(MBB); | ||
| if (!Inserted) | ||
| continue; | ||
| VNInfo *Idx = SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(MBB)); | ||
| if (!IsPrecedingDef(Idx)) | ||
| Worklist.append(MBB->pred_begin(), MBB->pred_end()); | ||
| } | ||
|
|
||
| return !Instrs.empty(); | ||
| } | ||
|
|
||
| bool RegisterCoalescer::joinCopy( | ||
| MachineInstr *CopyMI, bool &Again, | ||
| SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) { | ||
|
|
@@ -2183,6 +2308,34 @@ bool RegisterCoalescer::joinCopy( | |
| }); | ||
| } | ||
|
|
||
| SmallVector<MachineInstr *> SubregToRegSrcInsts; | ||
| Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); | ||
| if (CopyMI->isSubregToReg() && !SrcReg.isPhysical()) { | ||
| // For the case where the copy instruction is a SUBREG_TO_REG, e.g. | ||
| // | ||
| // %0:gpr32 = movimm32 .. | ||
| // %1:gpr64 = SUBREG_TO_REG 0, %0, sub32 | ||
| // ... | ||
| // %0:gpr32 = COPY <something> | ||
| // | ||
| // After joining liveranges, the original `movimm32` will need an | ||
| // implicit-def to make it explicit that the entire register is written, | ||
| // i.e. | ||
| // | ||
| // undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0 | ||
| // ... | ||
| // undef %0.sub32:gpr64 = COPY <something> // Note that this does not | ||
| // // require an implicit-def, | ||
| // // because it has nothing to | ||
| // // do with the SUBREG_TO_REG. | ||
| LiveInterval &SrcInt = LIS->getInterval(SrcReg); | ||
| SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI); | ||
| if (!findPrecedingDefs(SubregToRegSrcInsts, LIS, SrcInt, | ||
| CopyMI->getParent(), | ||
| SrcInt.Query(SubregToRegSlotIdx).valueIn())) | ||
| llvm_unreachable("SUBREG_TO_REG src requires a def"); | ||
| } | ||
|
|
||
| ShrinkMask = LaneBitmask::getNone(); | ||
| ShrinkMainRange = false; | ||
|
|
||
|
|
@@ -2253,7 +2406,8 @@ bool RegisterCoalescer::joinCopy( | |
| // Also update DstReg operands to include DstIdx if it is set. | ||
| if (CP.getDstIdx()) | ||
| updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); | ||
| updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); | ||
| updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), | ||
| SubregToRegSrcInsts); | ||
|
|
||
| // Shrink subregister ranges if necessary. | ||
| if (ShrinkMask.any()) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it specifically the
undefthat's a problem here? Could you note thatundefhere means the subreg write won't read other parts (it's not something I can find well documented, other than as a footnote on some slides: https://llvm.org/devmtg/2017-10/slides/Braun-Welcome%20to%20the%20Back%20End.pdf).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The undef means that the 64-bits of %1 are considered
undefbefore the assigning the.sub8result, and theimplicit-def %1would mean that the entire contents (not just the low 8bits) would be written. So as a consequence of either/both, the other bits of%1are not read. Either way, I've updated the wording.