Skip to content

Commit

Permalink
Improve handling of COPY instructions with identical value numbers
Browse files Browse the repository at this point in the history
Testcases provided by Tim Renouf.

Differential Revision: https://reviews.llvm.org/D48102

llvm-svn: 335472
  • Loading branch information
Krzysztof Parzyszek committed Jun 25, 2018
1 parent 2f5ca59 commit 4581f37
Show file tree
Hide file tree
Showing 8 changed files with 1,131 additions and 28 deletions.
154 changes: 126 additions & 28 deletions llvm/lib/CodeGen/RegisterCoalescer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,11 @@ namespace {
void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
MachineOperand &MO, unsigned SubRegIdx);

/// Handle copies of undef values.
/// Returns true if @p CopyMI was a copy of an undef value and eliminated.
bool eliminateUndefCopy(MachineInstr *CopyMI);
/// Handle copies of undef values. If the undef value is an incoming
/// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
/// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
/// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);

/// Check whether or not we should apply the terminal rule on the
/// destination (Dst) of \p Copy.
Expand Down Expand Up @@ -1367,7 +1369,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
return true;
}

bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
// ProcessImplicitDefs may leave some copies of <undef> values, it only
// removes local variables. When we have a copy like:
//
Expand All @@ -1391,16 +1393,34 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
if ((SR.LaneMask & SrcMask).none())
continue;
if (SR.liveAt(Idx))
return false;
return nullptr;
}
} else if (SrcLI.liveAt(Idx))
return false;

LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
return nullptr;

// Remove any DstReg segments starting at the instruction.
// If the undef copy defines a live-out value (i.e. an input to a PHI def),
// then replace it with an IMPLICIT_DEF.
LiveInterval &DstLI = LIS->getInterval(DstReg);
SlotIndex RegIndex = Idx.getRegSlot();
LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
assert(Seg != nullptr && "No segment for defining instruction");
if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) {
if (V->isPHIDef()) {
CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
MachineOperand &MO = CopyMI->getOperand(i-1);
if (MO.isReg() && MO.isUse())
CopyMI->RemoveOperand(i-1);
}
LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
"implicit def\n");
return CopyMI;
}
}

// Remove any DstReg segments starting at the instruction.
LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");

// Remove value or merge with previous one in case of a subregister def.
if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
Expand Down Expand Up @@ -1456,7 +1476,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
MO.setIsUndef(true);
LIS->shrinkToUses(&DstLI);

return true;
return CopyMI;
}

void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
Expand Down Expand Up @@ -1622,9 +1642,14 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}

// Eliminate undefs.
if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) {
deleteInstr(CopyMI);
return false; // Not coalescable.
if (!CP.isPhys()) {
// If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
if (UndefMI->isImplicitDef())
return false;
deleteInstr(CopyMI);
return false; // Not coalescable.
}
}

// Coalesced copies are normally removed immediately, but transformations
Expand Down Expand Up @@ -2078,6 +2103,13 @@ class JoinVals {
/// True once Pruned above has been computed.
bool PrunedComputed = false;

/// True if this value is determined to be identical to OtherVNI
/// (in valuesIdentical). This is used with CR_Erase where the erased
/// copy is redundant, i.e. the source value is already the same as
/// the destination. In such cases the subranges need to be updated
/// properly. See comment at pruneSubRegValues for more info.
bool Identical = false;

Val() = default;

bool isAnalyzed() const { return WriteLanes.any(); }
Expand Down Expand Up @@ -2212,17 +2244,17 @@ LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)

std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
const VNInfo *VNI) const {
unsigned Reg = this->Reg;
unsigned TrackReg = Reg;

while (!VNI->isPHIDef()) {
SlotIndex Def = VNI->def;
MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
assert(MI && "No defining instruction");
if (!MI->isFullCopy())
return std::make_pair(VNI, Reg);
return std::make_pair(VNI, TrackReg);
unsigned SrcReg = MI->getOperand(1).getReg();
if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
return std::make_pair(VNI, Reg);
return std::make_pair(VNI, TrackReg);

const LiveInterval &LI = LIS->getInterval(SrcReg);
const VNInfo *ValueIn;
Expand All @@ -2243,25 +2275,37 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
break;
}
}
if (ValueIn == nullptr)
break;
if (ValueIn == nullptr) {
// Reaching an undefined value is legitimate, for example:
//
// 1 undef %0.sub1 = ... ;; %0.sub0 == undef
// 2 %1 = COPY %0 ;; %1 is defined here.
// 3 %0 = COPY %1 ;; Now %0.sub0 has a definition,
// ;; but it's equivalent to "undef".
return std::make_pair(nullptr, SrcReg);
}
VNI = ValueIn;
Reg = SrcReg;
TrackReg = SrcReg;
}
return std::make_pair(VNI, Reg);
return std::make_pair(VNI, TrackReg);
}

bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
const JoinVals &Other) const {
const VNInfo *Orig0;
unsigned Reg0;
std::tie(Orig0, Reg0) = followCopyChain(Value0);
if (Orig0 == Value1)
if (Orig0 == Value1 && Reg0 == Other.Reg)
return true;

const VNInfo *Orig1;
unsigned Reg1;
std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
// If both values are undefined, and the source registers are the same
// register, the values are identical. Filter out cases where only one
// value is defined.
if (Orig0 == nullptr || Orig1 == nullptr)
return Orig0 == Orig1 && Reg0 == Reg1;

// The values are equal if they are defined at the same place and use the
// same register. Note that we cannot compare VNInfos directly as some of
Expand Down Expand Up @@ -2437,9 +2481,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// %other = COPY %ext
// %this = COPY %ext <-- Erase this copy
//
if (DefMI->isFullCopy() && !CP.isPartial()
&& valuesIdentical(VNI, V.OtherVNI, Other))
if (DefMI->isFullCopy() && !CP.isPartial() &&
valuesIdentical(VNI, V.OtherVNI, Other)) {
V.Identical = true;
return CR_Erase;
}

// If the lanes written by this instruction were all undef in OtherVNI, it is
// still safe to join the live ranges. This can't be done with a simple value
Expand Down Expand Up @@ -2743,19 +2789,62 @@ void JoinVals::pruneValues(JoinVals &Other,
}
}

/// Consider the following situation when coalescing the copy between
/// %31 and %45 at 800. (The vertical lines represent live range segments.)
///
/// Main range Subrange 0004 (sub2)
/// %31 %45 %31 %45
/// 544 %45 = COPY %28 + +
/// | v1 | v1
/// 560B bb.1: + +
/// 624 = %45.sub2 | v2 | v2
/// 800 %31 = COPY %45 + + + +
/// | v0 | v0
/// 816 %31.sub1 = ... + |
/// 880 %30 = COPY %31 | v1 +
/// 928 %45 = COPY %30 | + +
/// | | v0 | v0 <--+
/// 992B ; backedge -> bb.1 | + + |
/// 1040 = %31.sub0 + |
/// This value must remain
/// live-out!
///
/// Assuming that %31 is coalesced into %45, the copy at 928 becomes
/// redundant, since it copies the value from %45 back into it. The
/// conflict resolution for the main range determines that %45.v0 is
/// to be erased, which is ok since %31.v1 is identical to it.
/// The problem happens with the subrange for sub2: it has to be live
/// on exit from the block, but since 928 was actually a point of
/// definition of %45.sub2, %45.sub2 was not live immediately prior
/// to that definition. As a result, when 928 was erased, the value v0
/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
/// providing an incorrect value to the use at 624.
///
/// Since the main-range values %31.v1 and %45.v0 were proved to be
/// identical, the corresponding values in subranges must also be the
/// same. A redundant copy is removed because it's not needed, and not
/// because it copied an undefined value, so any liveness that originated
/// from that copy cannot disappear. When pruning a value that started
/// at the removed copy, the corresponding identical value must be
/// extended to replace it.
void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// Look for values being erased.
bool DidPrune = false;
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
Val &V = Vals[i];
// We should trigger in all cases in which eraseInstrs() does something.
// match what eraseInstrs() is doing, print a message so
if (Vals[i].Resolution != CR_Erase &&
(Vals[i].Resolution != CR_Keep || !Vals[i].ErasableImplicitDef ||
!Vals[i].Pruned))
if (V.Resolution != CR_Erase &&
(V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
continue;

// Check subranges at the point where the copy will be removed.
SlotIndex Def = LR.getValNumInfo(i)->def;
SlotIndex OtherDef;
if (V.Identical)
OtherDef = V.OtherVNI->def;

// Print message so mismatches with eraseInstrs() can be diagnosed.
LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
<< '\n');
Expand All @@ -2768,10 +2857,18 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
if (ValueOut != nullptr && Q.valueIn() == nullptr) {
LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
<< " at " << Def << "\n");
LIS->pruneValue(S, Def, nullptr);
SmallVector<SlotIndex,8> EndPoints;
LIS->pruneValue(S, Def, &EndPoints);
DidPrune = true;
// Mark value number as unused.
ValueOut->markUnused();

if (V.Identical && S.Query(OtherDef).valueOut()) {
// If V is identical to V.OtherVNI (and S was live at OtherDef),
// then we can't simply prune V from S. V needs to be replaced
// with V.OtherVNI.
LIS->extendToIndices(S, EndPoints);
}
continue;
}
// If a subrange ends at the copy, then a value was copied but only
Expand Down Expand Up @@ -2964,7 +3061,8 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
NewVNInfo);

LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
<< ' ' << LRange << "\n");
if (EndPoints.empty())
return;

Expand Down
116 changes: 116 additions & 0 deletions llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -run-pass=simple-register-coalescing -o - %s | FileCheck -check-prefix=GCN %s

# GCN: {{^body}}

---
name: foo
tracksRegLiveness: true
body: |
bb.0:
successors: %bb.2
%0:sreg_32_xm0 = S_MOV_B32 1
%1:vgpr_32 = COPY %0
INLINEASM &"; %1", 1, 327690, def %1, 2147483657, %1(tied-def 3)
%2:sreg_64 = V_CMP_NE_U32_e64 0, %1, implicit $exec
undef %3.sub0:sreg_128 = COPY %0
%3.sub1:sreg_128 = COPY %0
%3.sub2:sreg_128 = COPY %0
%4:sreg_128 = COPY %3
%5:vgpr_32 = V_MOV_B32_e32 -64, implicit $exec
%6:vreg_128 = COPY %4
%7:sreg_32_xm0 = S_AND_B32 target-flags(amdgpu-gotprel) 1, %2.sub0, implicit-def dead $scc
%8:sreg_32_xm0 = S_MOV_B32 0
%9:vgpr_32 = COPY %5
%10:vreg_128 = COPY %6
S_BRANCH %bb.2
bb.1:
%11:vgpr_32 = V_OR_B32_e32 %12.sub0, %12.sub1, implicit $exec
%13:vgpr_32 = V_OR_B32_e32 %11, %12.sub2, implicit $exec
%14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec
%15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec
%16:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %15, implicit $exec
BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
S_ENDPGM
bb.2:
successors: %bb.3, %bb.4
%19:sreg_64 = V_CMP_EQ_U32_e64 1, %7, implicit $exec
%20:sreg_64 = COPY $exec, implicit-def $exec
%21:sreg_64 = S_AND_B64 %20, %19, implicit-def dead $scc
$exec = S_MOV_B64_term %21
SI_MASK_BRANCH %bb.4, implicit $exec
S_BRANCH %bb.3
bb.3:
successors: %bb.4
undef %22.sub0:sreg_128 = COPY %8
%22.sub1:sreg_128 = COPY %8
%22.sub2:sreg_128 = COPY %8
%23:sreg_128 = COPY %22
%24:vreg_128 = COPY %23
%10:vreg_128 = COPY %24
bb.4:
successors: %bb.5
$exec = S_OR_B64 $exec, %20, implicit-def $scc
bb.5:
successors: %bb.7, %bb.6
S_CBRANCH_SCC0 %bb.7, implicit undef $scc
bb.6:
successors: %bb.9
%12:vreg_128 = COPY %10
S_BRANCH %bb.9
bb.7:
successors: %bb.8, %bb.10
%25:vgpr_32 = V_AND_B32_e32 target-flags(amdgpu-gotprel32-hi) 1, %10.sub2, implicit $exec
%26:sreg_64 = V_CMP_EQ_U32_e64 1, %25, implicit $exec
%27:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%28:vreg_1 = COPY %27
%29:sreg_64 = COPY $exec, implicit-def $exec
%30:sreg_64 = S_AND_B64 %29, %26, implicit-def dead $scc
$exec = S_MOV_B64_term %30
SI_MASK_BRANCH %bb.10, implicit $exec
S_BRANCH %bb.8
bb.8:
successors: %bb.10
%31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec
%35:vgpr_32 = V_CNDMASK_B32_e64 0, -1, %34, implicit $exec
%28:vreg_1 = COPY %35
S_BRANCH %bb.10
bb.9:
successors: %bb.11
S_BRANCH %bb.11
bb.10:
successors: %bb.9
$exec = S_OR_B64 $exec, %29, implicit-def $scc
%36:vreg_1 = COPY %28
%37:sreg_64_xexec = V_CMP_NE_U32_e64 0, %36, implicit $exec
%38:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %37, implicit $exec
%39:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
undef %40.sub0:vreg_128 = COPY %39
%40.sub1:vreg_128 = COPY %39
%40.sub2:vreg_128 = COPY %38
%41:vreg_128 = COPY %40
%12:vreg_128 = COPY %41
S_BRANCH %bb.9
bb.11:
successors: %bb.2, %bb.1
%42:vgpr_32 = V_ADD_I32_e32 32, %9, implicit-def dead $vcc, implicit $exec
V_CMP_EQ_U32_e32 0, %42, implicit-def $vcc, implicit $exec
%43:vgpr_32 = COPY %42
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
%44:vreg_128 = COPY %12
%9:vgpr_32 = COPY %43
%10:vreg_128 = COPY %44
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
S_BRANCH %bb.2
...
Loading

0 comments on commit 4581f37

Please sign in to comment.