Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 168 additions & 14 deletions llvm/lib/CodeGen/RegisterCoalescer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,12 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
///
/// If \p SubregToRegSrcInsts is not empty, we are coalescing a
/// `DstReg = SUBREG_TO_REG SrcReg`, which should introduce an
/// implicit-def of DstReg on instructions that define SrcReg.
void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
ArrayRef<MachineInstr *> SubregToRegSrcInsts = {});

/// If the given machine operand reads only undefined lanes add an undef
/// flag.
Expand Down Expand Up @@ -1444,6 +1449,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,

// CopyMI may have implicit operands, save them so that we can transfer them
// over to the newly materialized instruction after CopyMI is removed.
LaneBitmask NewMIImplicitOpsMask;
SmallVector<MachineOperand, 4> ImplicitOps;
ImplicitOps.reserve(CopyMI->getNumOperands() -
CopyMI->getDesc().getNumOperands());
Expand All @@ -1458,6 +1464,9 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
"unexpected implicit virtual register def");
ImplicitOps.push_back(MO);
if (MO.isDef() && MO.getReg().isVirtual() &&
MRI->shouldTrackSubRegLiveness(DstReg))
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}

Expand Down Expand Up @@ -1494,14 +1503,11 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
} else {
assert(MO.getReg() == NewMI.getOperand(0).getReg());

// We're only expecting another def of the main output, so the range
// should get updated with the regular output range.
//
// FIXME: The range updating below probably needs updating to look at
// the super register if subranges are tracked.
assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
"subrange update for implicit-def of super register may not be "
"properly handled");
// If lanemasks need to be tracked, compile the lanemask of the NewMI
// implicit def operands to avoid subranges for the super-regs from
// being removed by code later on in this function.
if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
}
Expand Down Expand Up @@ -1617,7 +1623,8 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
*LIS->getSlotIndexes(), *TRI);

for (LiveInterval::SubRange &SR : DstInt.subranges()) {
if ((SR.LaneMask & DstMask).none()) {
if ((SR.LaneMask & DstMask).none() &&
(SR.LaneMask & NewMIImplicitOpsMask).none()) {
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
Expand Down Expand Up @@ -1891,11 +1898,14 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
}

void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
unsigned SubIdx) {
void RegisterCoalescer::updateRegDefsUses(
Register SrcReg, Register DstReg, unsigned SubIdx,
ArrayRef<MachineInstr *> SubregToRegSrcInsts) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);

// Coalescing a COPY may expose reads of 'undef' subregisters.
// If so, then explicitly propagate 'undef' to those operands.
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
if (MO.isUndef())
Expand All @@ -1912,6 +1922,15 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
}
}

// If DstInt already has a subrange for the unused lanes, then we shouldn't
// create duplicate subranges when we update the interval for unused lanes.
LaneBitmask DstIntLaneMask;
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
for (LiveInterval::SubRange &SR : DstInt->subranges())
DstIntLaneMask |= SR.LaneMask;
}

// Go through all instructions to replace uses of 'SrcReg' by 'DstReg'.
SmallPtrSet<MachineInstr *, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
E = MRI->reg_instr_end();
Expand All @@ -1935,6 +1954,80 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));

bool RequiresImplicitRedef = false;
if (!SubregToRegSrcInsts.empty()) {
// We can only add an implicit-def and undef if the sub registers match,
// e.g.
// %0:gr32 = INSTX
// %0.sub8:gr32 = INSTY // top 24 bits of %0 still defined
// %1:gr64 = SUBREG_TO_REG 0, %0, %subreg.sub32
//
// This cannot be transformed into:
// %1.sub32:gr64 = INSTX
// undef %1.sub8:gr64 = INSTY , implicit-def %1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it specifically the undef that's a problem here? Could you note that undef here means the subreg write won't read other parts (it's not something I can find well documented, other than as a footnote on some slides: https://llvm.org/devmtg/2017-10/slides/Braun-Welcome%20to%20the%20Back%20End.pdf).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The undef means that the 64-bits of %1 are considered undef before the assigning the .sub8 result, and the implicit-def %1 would mean that the entire contents (not just the low 8bits) would be written. So as a consequence of either/both, the other bits of %1 are not read. Either way, I've updated the wording.

//
// because the undef means that none of the bits of %1 are read, thus
// thrashing the top 24 bits of %1.sub32.
if (is_contained(SubregToRegSrcInsts, UseMI) &&
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to use a SmallPtrSet<MachineInstr *> for SubregToRegSrcInsts given it's main use is to check is_contained? That may scale better (given this is checked for each UseMI).

all_of(UseMI->defs(), [&SubIdx](const MachineOperand &MO) -> bool {
if (MO.isUndef())
return true;
return SubIdx && (!MO.getSubReg() || SubIdx == MO.getSubReg());
})) {
// Add implicit-def of super-register to express that the whole
// register is defined by the instruction.
MachineInstrBuilder MIB(*MF, UseMI);
MIB.addReg(DstReg, RegState::ImplicitDefine);
RequiresImplicitRedef = true;
}

// If the coalesed instruction doesn't fully define the register, we need
// to preserve the original super register liveness for SUBREG_TO_REG.
//
// We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
// but it introduces liveness for other subregisters. Downstream users may
// have been relying on those bits, so we need to ensure their liveness is
// captured with a def of other lanes.
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
// First check if there is sufficient granularity in terms of subranges.
LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
LaneBitmask UnusedLanes = DstMask & ~UsedLanes;
if ((UnusedLanes & ~DstIntLaneMask).any()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
DstIntLaneMask |= UnusedLanes;
}

// After duplicating the live ranges for the low/hi bits, we
// need to update the subranges of the DstReg interval such that
// for a case like this:
//
// entry:
// 16B %1:gpr32 = INSTRUCTION (<=> UseMI)
// :
// if.then:
// 32B %1:gpr32 = MOVIMM32 ..
// 48B %0:gpr64 = SUBREG_TO_REG 0, %1, sub32
Comment on lines +2006 to +2011
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming in this example %1 is being coalesced to directly write to %0? However, I'm not sure what the relevance of the entry/if.then basic blocks is? Would this be the same if everything was in one block.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes that's right. The example of the values being defined in different blocks came from a reproducer, but the same might also happen in the one block.

//
// Only the MOVIMM32 requires a def of the top lanes and any intervals
// for the top 32-bits of the def at 16B should be removed.
for (LiveInterval::SubRange &SR : DstInt->subranges()) {
if (!Writes || RequiresImplicitRedef ||
(SR.LaneMask & UnusedLanes).none())
continue;

assert((SR.LaneMask & UnusedLanes) == SR.LaneMask &&
"Unexpected lanemask. Subrange needs finer granularity");

SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot();
auto SegmentI = SR.find(UseIdx);
if (SegmentI != SR.end())
SR.removeSegment(SegmentI, true);
}
}
}

// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned Op : Ops) {
MachineOperand &MO = UseMI->getOperand(Op);
Expand All @@ -1943,7 +2036,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// turn a full def into a read-modify-write sub-register def and vice
// versa.
if (SubIdx && MO.isDef())
MO.setIsUndef(!Reads);
MO.setIsUndef(!Reads || RequiresImplicitRedef);

// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
Expand Down Expand Up @@ -2046,6 +2139,38 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
LIS->shrinkToUses(&LI);
}

/// For a given use of value \p Idx, it returns the def in the current block,
/// or otherwise all possible defs in preceding blocks.
static bool findPrecedingDefs(SmallVector<MachineInstr *> &Instrs,
LiveIntervals *LIS, LiveInterval &SrcInt,
MachineBasicBlock *MBB, VNInfo *Idx) {
auto IsPrecedingDef = [&](VNInfo *Idx) -> bool {
if (Idx->isPHIDef())
return false;
MachineInstr *Def = LIS->getInstructionFromIndex(Idx->def);
assert(Def && "Unable to find a def for SUBREG_TO_REG source operand");
Instrs.push_back(Def);
return true;
};

if (IsPrecedingDef(Idx))
return true;

SmallVector<MachineBasicBlock *> Worklist(MBB->pred_begin(), MBB->pred_end());
SmallPtrSet<MachineBasicBlock *, 8> VisitedBlocks;
while (!Worklist.empty()) {
MachineBasicBlock *MBB = Worklist.pop_back_val();
auto [_, Inserted] = VisitedBlocks.insert(MBB);
if (!Inserted)
continue;
VNInfo *Idx = SrcInt.getVNInfoBefore(LIS->getMBBEndIdx(MBB));
if (!IsPrecedingDef(Idx))
Worklist.append(MBB->pred_begin(), MBB->pred_end());
}

return !Instrs.empty();
}

bool RegisterCoalescer::joinCopy(
MachineInstr *CopyMI, bool &Again,
SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
Expand Down Expand Up @@ -2183,6 +2308,34 @@ bool RegisterCoalescer::joinCopy(
});
}

SmallVector<MachineInstr *> SubregToRegSrcInsts;
Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
if (CopyMI->isSubregToReg() && !SrcReg.isPhysical()) {
// For the case where the copy instruction is a SUBREG_TO_REG, e.g.
//
// %0:gpr32 = movimm32 ..
// %1:gpr64 = SUBREG_TO_REG 0, %0, sub32
// ...
// %0:gpr32 = COPY <something>
//
// After joining liveranges, the original `movimm32` will need an
// implicit-def to make it explicit that the entire register is written,
// i.e.
//
// undef %0.sub32:gpr64 = movimm32 ..., implicit-def %0
// ...
// undef %0.sub32:gpr64 = COPY <something> // Note that this does not
// // require an implicit-def,
// // because it has nothing to
// // do with the SUBREG_TO_REG.
LiveInterval &SrcInt = LIS->getInterval(SrcReg);
SlotIndex SubregToRegSlotIdx = LIS->getInstructionIndex(*CopyMI);
if (!findPrecedingDefs(SubregToRegSrcInsts, LIS, SrcInt,
CopyMI->getParent(),
SrcInt.Query(SubregToRegSlotIdx).valueIn()))
llvm_unreachable("SUBREG_TO_REG src requires a def");
}

ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;

Expand Down Expand Up @@ -2253,7 +2406,8 @@ bool RegisterCoalescer::joinCopy(
// Also update DstReg operands to include DstIdx if it is set.
if (CP.getDstIdx())
updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
SubregToRegSrcInsts);

// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SplitKit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
const MachineInstr *DefMI = LIS.getInstructionFromIndex(Def);
assert(DefMI != nullptr);
LaneBitmask LM;
for (const MachineOperand &DefOp : DefMI->defs()) {
for (const MachineOperand &DefOp : DefMI->all_defs()) {
Register R = DefOp.getReg();
if (R != LI.reg())
continue;
Expand Down
Loading
Loading