1,005 changes: 527 additions & 478 deletions llvm/lib/CodeGen/InlineSpiller.cpp

Large diffs are not rendered by default.

73 changes: 50 additions & 23 deletions llvm/lib/CodeGen/LiveRangeEdit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,13 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
for (VNInfo *VNI : getParent().valnos) {
if (VNI->isUnused())
continue;
MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
unsigned Original = VRM->getOriginal(getReg());
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
if (!DefMI)
continue;
checkRematerializable(VNI, DefMI, aa);
checkRematerializable(OrigVNI, DefMI, aa);
}
ScannedRemattable = true;
}
Expand Down Expand Up @@ -113,24 +116,18 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
return true;
}

bool LiveRangeEdit::canRematerializeAt(Remat &RM,
SlotIndex UseIdx,
bool cheapAsAMove) {
bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
SlotIndex UseIdx, bool cheapAsAMove) {
assert(ScannedRemattable && "Call anyRematerializable first");

// Use scanRemattable info.
if (!Remattable.count(RM.ParentVNI))
if (!Remattable.count(OrigVNI))
return false;

// No defining instruction provided.
SlotIndex DefIdx;
if (RM.OrigMI)
DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
else {
DefIdx = RM.ParentVNI->def;
RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
assert(RM.OrigMI && "No defining instruction for remattable value");
}
assert(RM.OrigMI && "No defining instruction for remattable value");
DefIdx = LIS.getInstructionIndex(*RM.OrigMI);

// If only cheap remats were requested, bail out early.
if (cheapAsAMove && !TII.isAsCheapAsAMove(RM.OrigMI))
Expand All @@ -151,10 +148,12 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
bool Late) {
assert(RM.OrigMI && "Invalid remat");
TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
// DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg
// to false anyway in case the isDead flag of RM.OrigMI's dest register
// is true.
(*--MI).getOperand(0).setIsDead(false);
Rematted.insert(RM.ParentVNI);
return LIS.getSlotIndexes()
->insertMachineInstrInMaps(*--MI, Late)
.getRegSlot();
return LIS.getSlotIndexes()->insertMachineInstrInMaps(*MI, Late).getRegSlot();
}

void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
Expand Down Expand Up @@ -261,6 +260,15 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// Collect virtual registers to be erased after MI is gone.
SmallVector<unsigned, 8> RegsToErase;
bool ReadsPhysRegs = false;
bool isOrigDef = false;
unsigned Dest;
if (VRM && MI->getOperand(0).isReg()) {
Dest = MI->getOperand(0).getReg();
unsigned Original = VRM->getOriginal(Dest);
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
}

// Check for live intervals that may shrink
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
Expand Down Expand Up @@ -314,11 +322,26 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
}
DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
} else {
if (TheDelegate)
TheDelegate->LRE_WillEraseInstruction(MI);
LIS.RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
++NumDCEDeleted;
// If the dest of MI is an original reg, don't delete the inst. Replace
// the dest with a new reg, keep the inst for remat of other siblings.
// The inst is saved in LiveRangeEdit::DeadRemats and will be deleted
// after all the allocations of the func are done.
if (isOrigDef) {
LiveInterval &NewLI = createEmptyIntervalFrom(Dest);
VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
pop_back();
markDeadRemat(MI);
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
MI->substituteRegister(Dest, NewLI.reg, 0, TRI);
MI->getOperand(0).setIsDead(true);
} else {
if (TheDelegate)
TheDelegate->LRE_WillEraseInstruction(MI);
LIS.RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
++NumDCEDeleted;
}
}

// Erase any virtregs that are now empty and unused. There may be <undef>
Expand All @@ -332,8 +355,9 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
}
}

void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
ArrayRef<unsigned> RegsBeingSpilled) {
void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
ArrayRef<unsigned> RegsBeingSpilled,
bool NoSplit) {
ToShrinkSet ToShrink;

for (;;) {
Expand All @@ -355,6 +379,9 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
if (!LIS.shrinkToUses(LI, &Dead))
continue;

if (NoSplit)
continue;

// Don't create new intervals for a register being spilled.
// The new intervals would have to be spilled anyway so its not worth it.
// Also they currently aren't spilled so creating them and not spilling
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/RegAllocBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,12 @@ void RegAllocBase::allocatePhysRegs() {
}
}
}

void RegAllocBase::postOptimization() {
spiller().postOptimization();
for (auto DeadInst : DeadRemats) {
LIS->RemoveMachineInstrFromMaps(*DeadInst);
DeadInst->eraseFromParent();
}
DeadRemats.clear();
}
10 changes: 10 additions & 0 deletions llvm/lib/CodeGen/RegAllocBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ class RegAllocBase {
LiveRegMatrix *Matrix;
RegisterClassInfo RegClassInfo;

/// Inst which is a def of an original reg and whose defs are already all
/// dead after remat is saved in DeadRemats. The deletion of such inst is
/// postponed till all the allocations are done, so its remat expr is
/// always available for the remat of all the siblings of the original reg.
SmallPtrSet<MachineInstr *, 32> DeadRemats;

RegAllocBase()
: TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {}

Expand All @@ -77,6 +83,10 @@ class RegAllocBase {
// physical register assignments.
void allocatePhysRegs();

// Include spiller post optimization and removing dead defs left because of
// rematerialization.
virtual void postOptimization();

// Get a temporary reference to a Spiller instance.
virtual Spiller &spiller() = 0;

Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/CodeGen/RegAllocBasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
Matrix->unassign(Spill);

// Spill the extracted interval.
LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM);
LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
spiller().spill(LRE);
}
return true;
Expand Down Expand Up @@ -258,7 +258,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM);
LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
spiller().spill(LRE);

// The live virtual register requesting allocation was spilled, so tell
Expand All @@ -283,6 +283,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));

allocatePhysRegs();
postOptimization();

// Diagnostic output before rewriting
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
Expand Down
31 changes: 17 additions & 14 deletions llvm/lib/CodeGen/RegAllocGreedy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/Passes.h"
#include "AllocationOrder.h"
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
Expand All @@ -33,6 +32,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
Expand All @@ -44,6 +44,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <queue>

Expand All @@ -55,14 +56,14 @@ STATISTIC(NumGlobalSplits, "Number of split global live ranges");
STATISTIC(NumLocalSplits, "Number of split local live ranges");
STATISTIC(NumEvicted, "Number of interferences evicted");

static cl::opt<SplitEditor::ComplementSpillMode>
SplitSpillMode("split-spill-mode", cl::Hidden,
cl::desc("Spill mode for splitting live ranges"),
cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
clEnumValEnd),
cl::init(SplitEditor::SM_Partition));
static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
"split-spill-mode", cl::Hidden,
cl::desc("Spill mode for splitting live ranges"),
cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
clEnumValEnd),
cl::init(SplitEditor::SM_Speed));

static cl::opt<unsigned>
LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
Expand Down Expand Up @@ -1465,7 +1466,7 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
SmallVectorImpl<unsigned> &NewVRegs) {
SmallVector<unsigned, 8> UsedCands;
// Prepare split editor.
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitSpillMode);

// Assign all edge bundles to the preferred candidate, or NoCand.
Expand Down Expand Up @@ -1513,7 +1514,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
unsigned Reg = VirtReg.reg;
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitSpillMode);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
Expand Down Expand Up @@ -1585,7 +1586,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,

// Always enable split spill mode, since we're effectively spilling to a
// register.
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitEditor::SM_Size);

ArrayRef<SlotIndex> Uses = SA->getUseSlots();
Expand Down Expand Up @@ -1908,7 +1909,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
<< '-' << Uses[BestAfter] << ", " << BestDiff
<< ", " << (BestAfter - BestBefore + 1) << " instrs\n");

LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit);

SE->openIntv();
Expand Down Expand Up @@ -2551,7 +2552,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
NewVRegs.push_back(VirtReg.reg);
} else {
NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);

Expand Down Expand Up @@ -2609,6 +2610,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {

allocatePhysRegs();
tryHintsRecoloring();
postOptimization();

releaseMemory();
return true;
}
21 changes: 20 additions & 1 deletion llvm/lib/CodeGen/RegAllocPBQP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ class RegAllocPBQP : public MachineFunctionPass {

RegSet VRegsToAlloc, EmptyIntervalVRegs;

/// Inst which is a def of an original reg and whose defs are already all
/// dead after remat is saved in DeadRemats. The deletion of such inst is
/// postponed till all the allocations are done, so its remat expr is
/// always available for the remat of all the siblings of the original reg.
SmallPtrSet<MachineInstr *, 32> DeadRemats;

/// \brief Finds the initial set of vreg intervals to allocate.
void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);

Expand All @@ -146,6 +152,7 @@ class RegAllocPBQP : public MachineFunctionPass {
void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
VirtRegMap &VRM) const;

void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
};

char RegAllocPBQP::ID = 0;
Expand Down Expand Up @@ -631,7 +638,8 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
VirtRegMap &VRM, Spiller &VRegSpiller) {

VRegsToAlloc.erase(VReg);
LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM);
LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
nullptr, &DeadRemats);
VRegSpiller.spill(LRE);

const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
Expand Down Expand Up @@ -713,6 +721,16 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
}
}

void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
VRegSpiller.postOptimization();
/// Remove dead defs because of rematerialization.
for (auto DeadInst : DeadRemats) {
LIS.RemoveMachineInstrFromMaps(*DeadInst);
DeadInst->eraseFromParent();
}
DeadRemats.clear();
}

static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
unsigned NumInstr) {
// All intervals have a spill weight that is mostly proportional to the number
Expand Down Expand Up @@ -798,6 +816,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {

// Finalise allocation, allocate empty ranges.
finalizeAlloc(MF, LIS, VRM);
postOptimization(*VRegSpiller, LIS);
VRegsToAlloc.clear();
EmptyIntervalVRegs.clear();

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/Spiller.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ namespace llvm {
class MachineFunction;
class MachineFunctionPass;
class VirtRegMap;
class LiveIntervals;

/// Spiller interface.
///
Expand All @@ -28,15 +29,14 @@ namespace llvm {

/// spill - Spill the LRE.getParent() live interval.
virtual void spill(LiveRangeEdit &LRE) = 0;

virtual void postOptimization(){};
};

/// Create and return a spiller that will insert spill code directly instead
/// of deferring though VirtRegMap.
Spiller *createInlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm);

}

#endif
112 changes: 102 additions & 10 deletions llvm/lib/CodeGen/SplitKit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
Expand Down Expand Up @@ -430,8 +431,13 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
bool Late = RegIdx != 0;

// Attempt cheap-as-a-copy rematerialization.
unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
LiveRangeEdit::Remat RM(ParentVNI);
if (Edit->canRematerializeAt(RM, UseIdx, true)) {
RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);

if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
++NumRemats;
} else {
Expand Down Expand Up @@ -716,7 +722,62 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
}
}

void SplitEditor::hoistCopiesForSize() {
void SplitEditor::computeRedundantBackCopies(
DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) {
LiveInterval *LI = &LIS.getInterval(Edit->get(0));
LiveInterval *Parent = &Edit->getParent();
SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums());
SmallPtrSet<VNInfo *, 8> DominatedVNIs;

// Aggregate VNIs having the same value as ParentVNI.
for (VNInfo *VNI : LI->valnos) {
if (VNI->isUnused())
continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
EqualVNs[ParentVNI->id].insert(VNI);
}

// For VNI aggregation of each ParentVNI, collect dominated, i.e.,
// redundant VNIs to BackCopies.
for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
VNInfo *ParentVNI = Parent->getValNumInfo(i);
if (!NotToHoistSet.count(ParentVNI->id))
continue;
SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin();
SmallPtrSetIterator<VNInfo *> It2 = It1;
for (; It1 != EqualVNs[ParentVNI->id].end(); ++It1) {
It2 = It1;
for (++It2; It2 != EqualVNs[ParentVNI->id].end(); ++It2) {
if (DominatedVNIs.count(*It1) || DominatedVNIs.count(*It2))
continue;

MachineBasicBlock *MBB1 = LIS.getMBBFromIndex((*It1)->def);
MachineBasicBlock *MBB2 = LIS.getMBBFromIndex((*It2)->def);
if (MBB1 == MBB2) {
DominatedVNIs.insert((*It1)->def < (*It2)->def ? (*It2) : (*It1));
} else if (MDT.dominates(MBB1, MBB2)) {
DominatedVNIs.insert(*It2);
} else if (MDT.dominates(MBB2, MBB1)) {
DominatedVNIs.insert(*It1);
}
}
}
if (!DominatedVNIs.empty()) {
forceRecompute(0, ParentVNI);
for (auto VNI : DominatedVNIs) {
BackCopies.push_back(VNI);
}
DominatedVNIs.clear();
}
}
}

/// For SM_Size mode, find a common dominator for all the back-copies for
/// the same ParentVNI and hoist the backcopies to the dominator BB.
/// For SM_Speed mode, if the common dominator is hot and it is not beneficial
/// to do the hoisting, simply remove the dominated backcopies for the same
/// ParentVNI.
void SplitEditor::hoistCopies() {
// Get the complement interval, always RegIdx 0.
LiveInterval *LI = &LIS.getInterval(Edit->get(0));
LiveInterval *Parent = &Edit->getParent();
Expand All @@ -725,6 +786,11 @@ void SplitEditor::hoistCopiesForSize() {
// indexed by ParentVNI->id.
typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
// The total cost of all the back-copies for each ParentVNI.
SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums());
// The ParentVNI->id set for which hoisting back-copies are not beneficial
// for Speed.
DenseSet<unsigned> NotToHoistSet;

// Find the nearest common dominator for parent values with multiple
// back-copies. If a single back-copy dominates, put it in DomPair.second.
Expand All @@ -740,6 +806,7 @@ void SplitEditor::hoistCopiesForSize() {
continue;

MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);

DomPair &Dom = NearestDom[ParentVNI->id];

// Keep directly defined parent values. This is either a PHI or an
Expand Down Expand Up @@ -774,6 +841,7 @@ void SplitEditor::hoistCopiesForSize() {
else if (Near != Dom.first)
// None dominate. Hoist to common dominator, need new def.
Dom = DomPair(Near, SlotIndex());
Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB);
}

DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
Expand All @@ -792,6 +860,11 @@ void SplitEditor::hoistCopiesForSize() {
MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
// Get a less loopy dominator than Dom.first.
Dom.first = findShallowDominator(Dom.first, DefMBB);
if (SpillMode == SM_Speed &&
MBFI.getBlockFreq(Dom.first) > Costs[ParentVNI->id]) {
NotToHoistSet.insert(ParentVNI->id);
continue;
}
SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
Dom.second =
defFromParent(0, ParentVNI, Last, *Dom.first,
Expand All @@ -806,11 +879,18 @@ void SplitEditor::hoistCopiesForSize() {
continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
const DomPair &Dom = NearestDom[ParentVNI->id];
if (!Dom.first || Dom.second == VNI->def)
if (!Dom.first || Dom.second == VNI->def ||
NotToHoistSet.count(ParentVNI->id))
continue;
BackCopies.push_back(VNI);
forceRecompute(0, ParentVNI);
}

// If it is not beneficial to hoist all the BackCopies, simply remove
// redundant BackCopies in speed mode.
if (SpillMode == SM_Speed && !NotToHoistSet.empty())
computeRedundantBackCopies(NotToHoistSet, BackCopies);

removeBackCopies(BackCopies);
}

Expand Down Expand Up @@ -925,12 +1005,22 @@ bool SplitEditor::transferValues() {
}

void SplitEditor::extendPHIKillRanges() {
// Extend live ranges to be live-out for successor PHI values.
// Extend live ranges to be live-out for successor PHI values.
for (const VNInfo *PHIVNI : Edit->getParent().valnos) {
if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
continue;
unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));

// Check whether PHI is dead.
const LiveRange::Segment *Segment = LR.getSegmentContaining(PHIVNI->def);
assert(Segment != nullptr && "Missing segment for VNI");
if (Segment->end == PHIVNI->def.getDeadSlot()) {
// This is a dead PHI. Remove it.
LR.removeSegment(*Segment, true);
continue;
}

LiveRangeCalc &LRC = getLRCalc(RegIdx);
MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
Expand Down Expand Up @@ -1004,6 +1094,8 @@ void SplitEditor::deleteRematVictims() {
// Dead defs end at the dead slot.
if (S.end != S.valno->def.getDeadSlot())
continue;
if (S.valno->isPHIDef())
continue;
MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
assert(MI && "Missing instruction for dead def");
MI->addRegisterDead(LI->reg, &TRI);
Expand Down Expand Up @@ -1048,22 +1140,22 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
// Leave all back-copies as is.
break;
case SM_Size:
hoistCopiesForSize();
break;
case SM_Speed:
llvm_unreachable("Spill mode 'speed' not implemented yet");
// hoistCopies will behave differently between size and speed.
hoistCopies();
}

// Transfer the simply mapped values, check if any are skipped.
bool Skipped = transferValues();

// Rewrite virtual registers, possibly extending ranges.
rewriteAssigned(Skipped);

if (Skipped)
extendPHIKillRanges();
else
++NumSimple;

// Rewrite virtual registers, possibly extending ranges.
rewriteAssigned(Skipped);

// Delete defs that were rematted everywhere.
if (Skipped)
deleteRematVictims();
Expand Down
12 changes: 9 additions & 3 deletions llvm/lib/CodeGen/SplitKit.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/SmallPtrSet.h"

Expand Down Expand Up @@ -329,9 +330,14 @@ class LLVM_LIBRARY_VISIBILITY SplitEditor {
MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB);

/// hoistCopiesForSize - Hoist back-copies to the complement interval in a
/// way that minimizes code size. This implements the SM_Size spill mode.
void hoistCopiesForSize();
/// Find out all the backCopies dominated by others.
void computeRedundantBackCopies(DenseSet<unsigned> &NotToHoistSet,
SmallVectorImpl<VNInfo *> &BackCopies);

/// Hoist back-copies to the complement interval. It tries to hoist all
/// the back-copies to one BB if it is beneficial, or else simply remove
/// redundent backcopies dominated by others.
void hoistCopies();

/// transferValues - Transfer values to the new ranges.
/// Return true if any ranges were skipped.
Expand Down
514 changes: 0 additions & 514 deletions llvm/test/CodeGen/AArch64/aarch64-deferred-spilling.ll

This file was deleted.

162 changes: 162 additions & 0 deletions llvm/test/CodeGen/ARM/interval-update-remat.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
; RUN: llc -verify-regalloc < %s
; PR27275: When enabling remat for vreg defined by PHIs, make sure the update
; of the live range removes dead phi. Otherwise, we may end up with PHIs with
; incorrect operands and that will trigger assertions or verifier failures
; in later passes.

target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
target triple = "thumbv7-apple-ios9.0.0"

%class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326 = type { %class.MockTransportClientSocketPool.0.225.275.1300.2325, i32 }
%class.MockTransportClientSocketPool.0.225.275.1300.2325 = type { i8 }
%class.StaticSocketDataProvider.6.231.281.1306.2331 = type { i8, %struct.MockConnect.5.230.280.1305.2330 }
%struct.MockConnect.5.230.280.1305.2330 = type { %class.IPEndPoint.4.229.279.1304.2329 }
%class.IPEndPoint.4.229.279.1304.2329 = type { %class.IPAddress.3.228.278.1303.2328 }
%class.IPAddress.3.228.278.1303.2328 = type { %"class.(anonymous namespace)::vector.2.227.277.1302.2327" }
%"class.(anonymous namespace)::vector.2.227.277.1302.2327" = type { i8 }
%class.TestCompletionCallback.9.234.284.1309.2334 = type { %class.TestCompletionCallbackTemplate.8.233.283.1308.2333, i32 }
%class.TestCompletionCallbackTemplate.8.233.283.1308.2333 = type { i32 }
%class.AssertionResult.24.249.299.1324.2349 = type { i8, %class.scoped_ptr.23.248.298.1323.2348 }
%class.scoped_ptr.23.248.298.1323.2348 = type { %class.Trans_NS___1_basic_string.18.243.293.1318.2343* }
%class.Trans_NS___1_basic_string.18.243.293.1318.2343 = type { %class.Trans_NS___1___libcpp_compressed_pair_imp.17.242.292.1317.2342 }
%class.Trans_NS___1___libcpp_compressed_pair_imp.17.242.292.1317.2342 = type { %"struct.Trans_NS___1_basic_string<char, int, int>::__rep.16.241.291.1316.2341" }
%"struct.Trans_NS___1_basic_string<char, int, int>::__rep.16.241.291.1316.2341" = type { %"struct.Trans_NS___1_basic_string<char, int, int>::__long.15.240.290.1315.2340" }
%"struct.Trans_NS___1_basic_string<char, int, int>::__long.15.240.290.1315.2340" = type { i64, i32 }
%class.AssertHelper.10.235.285.1310.2335 = type { i8 }
%class.Message.13.238.288.1313.2338 = type { %class.scoped_ptr.0.12.237.287.1312.2337 }
%class.scoped_ptr.0.12.237.287.1312.2337 = type { %"class.(anonymous namespace)::basic_stringstream.11.236.286.1311.2336"* }
%"class.(anonymous namespace)::basic_stringstream.11.236.286.1311.2336" = type { i8 }
%class.scoped_refptr.19.244.294.1319.2344 = type { i8 }
%class.BoundNetLog.20.245.295.1320.2345 = type { i32 }
%struct.MockReadWrite.7.232.282.1307.2332 = type { i32 }
%"class.(anonymous namespace)::basic_iostream.22.247.297.1322.2347" = type { i8 }
%class.ClientSocketHandle.14.239.289.1314.2339 = type { i8 }
%"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346" = type { i8 }

@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1

define void @_ZN53SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test6m_fn10Ev(%class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326* %this) align 2 {
entry:
%socket_data = alloca %class.StaticSocketDataProvider.6.231.281.1306.2331, align 1
%agg.tmp = alloca %struct.MockConnect.5.230.280.1305.2330, align 1
%callback = alloca %class.TestCompletionCallback.9.234.284.1309.2334, align 4
%gtest_ar = alloca %class.AssertionResult.24.249.299.1324.2349, align 4
%temp.lvalue = alloca %class.AssertHelper.10.235.285.1310.2335, align 1
%agg.tmp10 = alloca %class.Message.13.238.288.1313.2338, align 4
%ref.tmp = alloca %class.Trans_NS___1_basic_string.18.243.293.1318.2343, align 4
%agg.tmp16 = alloca %class.scoped_refptr.19.244.294.1319.2344, align 1
%agg.tmp18 = alloca %class.BoundNetLog.20.245.295.1320.2345, align 4
%call2 = call %class.StaticSocketDataProvider.6.231.281.1306.2331* @_ZN24StaticSocketDataProviderC1EP13MockReadWritejS1_j(%class.StaticSocketDataProvider.6.231.281.1306.2331* nonnull %socket_data, %struct.MockReadWrite.7.232.282.1307.2332* undef, i32 1, %struct.MockReadWrite.7.232.282.1307.2332* null, i32 0)
%call3 = call %struct.MockConnect.5.230.280.1305.2330* @_ZN11MockConnectC1Ev(%struct.MockConnect.5.230.280.1305.2330* nonnull %agg.tmp)
call void @_ZN24StaticSocketDataProvider5m_fn8E11MockConnect(%class.StaticSocketDataProvider.6.231.281.1306.2331* nonnull %socket_data, %struct.MockConnect.5.230.280.1305.2330* nonnull %agg.tmp)
%call5 = call %class.TestCompletionCallback.9.234.284.1309.2334* @_ZN22TestCompletionCallbackC1Ev(%class.TestCompletionCallback.9.234.284.1309.2334* nonnull %callback)
%transport_socket_pool_ = getelementptr inbounds %class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326, %class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326* %this, i32 0, i32 0
%call6 = call i32 @_ZN29MockTransportClientSocketPool5m_fn9Ev(%class.MockTransportClientSocketPool.0.225.275.1300.2325* %transport_socket_pool_)
call void @_Z11CmpHelperEQPcS_xx(%class.AssertionResult.24.249.299.1324.2349* nonnull sret %gtest_ar, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i64 0, i64 undef)
%tmp = load i8, i8* undef, align 4
%tobool.i = icmp eq i8 %tmp, 0
br i1 %tobool.i, label %if.else, label %if.end

if.else: ; preds = %entry
br i1 undef, label %_ZN15AssertionResult5m_fn6Ev.exit, label %cond.true.i

cond.true.i: ; preds = %if.else
%call4.i = call i8* @_ZN25Trans_NS___1_basic_stringIciiE5m_fn1Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343* nonnull undef)
br label %_ZN15AssertionResult5m_fn6Ev.exit

_ZN15AssertionResult5m_fn6Ev.exit: ; preds = %cond.true.i, %if.else
%cond.i = phi i8* [ %call4.i, %cond.true.i ], [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), %if.else ]
%call9 = call %class.AssertHelper.10.235.285.1310.2335* @_ZN12AssertHelperC1EPKc(%class.AssertHelper.10.235.285.1310.2335* nonnull %temp.lvalue, i8* %cond.i)
%call11 = call %class.Message.13.238.288.1313.2338* @_ZN7MessageC1Ev(%class.Message.13.238.288.1313.2338* nonnull %agg.tmp10)
call void @_ZN12AssertHelperaSE7Message(%class.AssertHelper.10.235.285.1310.2335* nonnull %temp.lvalue, %class.Message.13.238.288.1313.2338* nonnull %agg.tmp10)
%call.i.i.i.i27 = call zeroext i1 @_Z6IsTruev()
%brmerge = or i1 false, undef
br i1 %brmerge, label %_ZN7MessageD1Ev.exit33, label %delete.notnull.i.i.i.i32

delete.notnull.i.i.i.i32: ; preds = %_ZN15AssertionResult5m_fn6Ev.exit
%call.i.i.i.i.i.i31 = call %"class.(anonymous namespace)::basic_iostream.22.247.297.1322.2347"* @_ZN12_GLOBAL__N_114basic_iostreamD2Ev(%"class.(anonymous namespace)::basic_iostream.22.247.297.1322.2347"* undef)
call void @_ZdlPv(i8* undef)
br label %_ZN7MessageD1Ev.exit33

_ZN7MessageD1Ev.exit33: ; preds = %delete.notnull.i.i.i.i32, %_ZN15AssertionResult5m_fn6Ev.exit
%call13 = call %class.AssertHelper.10.235.285.1310.2335* @_ZN12AssertHelperD1Ev(%class.AssertHelper.10.235.285.1310.2335* nonnull %temp.lvalue)
br label %if.end

if.end: ; preds = %_ZN7MessageD1Ev.exit33, %entry
%message_.i.i = getelementptr inbounds %class.AssertionResult.24.249.299.1324.2349, %class.AssertionResult.24.249.299.1324.2349* %gtest_ar, i32 0, i32 1
%call.i.i.i = call %class.scoped_ptr.23.248.298.1323.2348* @_ZN10scoped_ptrI25Trans_NS___1_basic_stringIciiEED2Ev(%class.scoped_ptr.23.248.298.1323.2348* %message_.i.i)
call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 12, i32 4, i1 false)
call void @_ZN25Trans_NS___1_basic_stringIciiE5m_fn2Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343* nonnull %ref.tmp)
call void @_Z19CreateSOCKSv5Paramsv(%class.scoped_refptr.19.244.294.1319.2344* nonnull sret %agg.tmp16)
%callback_.i = getelementptr inbounds %class.TestCompletionCallback.9.234.284.1309.2334, %class.TestCompletionCallback.9.234.284.1309.2334* %callback, i32 0, i32 1
%pool_ = getelementptr inbounds %class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326, %class.SOCKSClientSocketPoolTest_AsyncSOCKSConnectError_Test.1.226.276.1301.2326* %this, i32 0, i32 1
%tmp1 = getelementptr inbounds %class.BoundNetLog.20.245.295.1320.2345, %class.BoundNetLog.20.245.295.1320.2345* %agg.tmp18, i32 0, i32 0
store i32 0, i32* %tmp1, align 4
call void @_ZN18ClientSocketHandle5m_fn3IPiEEvRK25Trans_NS___1_basic_stringIciiE13scoped_refptr15RequestPriorityN16ClientSocketPool13RespectLimitsERiT_11BoundNetLog(%class.ClientSocketHandle.14.239.289.1314.2339* nonnull undef, %class.Trans_NS___1_basic_string.18.243.293.1318.2343* nonnull dereferenceable(12) %ref.tmp, %class.scoped_refptr.19.244.294.1319.2344* nonnull %agg.tmp16, i32 0, i32 1, i32* nonnull dereferenceable(4) %callback_.i, i32* %pool_, %class.BoundNetLog.20.245.295.1320.2345* nonnull %agg.tmp18)
%call19 = call %class.BoundNetLog.20.245.295.1320.2345* @_ZN11BoundNetLogD1Ev(%class.BoundNetLog.20.245.295.1320.2345* nonnull %agg.tmp18)
call void @_Z11CmpHelperEQPcS_xx(%class.AssertionResult.24.249.299.1324.2349* nonnull sret undef, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i64 -1, i64 0)
br i1 undef, label %if.then.i.i.i.i, label %_ZN7MessageD1Ev.exit

if.then.i.i.i.i: ; preds = %if.end
%tmp2 = load %"class.(anonymous namespace)::basic_stringstream.11.236.286.1311.2336"*, %"class.(anonymous namespace)::basic_stringstream.11.236.286.1311.2336"** undef, align 4
br label %_ZN7MessageD1Ev.exit

_ZN7MessageD1Ev.exit: ; preds = %if.then.i.i.i.i, %if.end
%connect_.i.i = getelementptr inbounds %class.StaticSocketDataProvider.6.231.281.1306.2331, %class.StaticSocketDataProvider.6.231.281.1306.2331* %socket_data, i32 0, i32 1
%tmp3 = bitcast %struct.MockConnect.5.230.280.1305.2330* %connect_.i.i to %"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346"*
%call.i.i.i.i.i.i.i.i.i.i = call %"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346"* @_ZN12_GLOBAL__N_113__vector_baseD2Ev(%"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346"* %tmp3)
ret void
}

; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #0

declare %class.StaticSocketDataProvider.6.231.281.1306.2331* @_ZN24StaticSocketDataProviderC1EP13MockReadWritejS1_j(%class.StaticSocketDataProvider.6.231.281.1306.2331* returned, %struct.MockReadWrite.7.232.282.1307.2332*, i32, %struct.MockReadWrite.7.232.282.1307.2332*, i32) unnamed_addr

declare void @_ZN24StaticSocketDataProvider5m_fn8E11MockConnect(%class.StaticSocketDataProvider.6.231.281.1306.2331*, %struct.MockConnect.5.230.280.1305.2330*)

declare %struct.MockConnect.5.230.280.1305.2330* @_ZN11MockConnectC1Ev(%struct.MockConnect.5.230.280.1305.2330* returned) unnamed_addr

declare %class.TestCompletionCallback.9.234.284.1309.2334* @_ZN22TestCompletionCallbackC1Ev(%class.TestCompletionCallback.9.234.284.1309.2334* returned) unnamed_addr

declare i32 @_ZN29MockTransportClientSocketPool5m_fn9Ev(%class.MockTransportClientSocketPool.0.225.275.1300.2325*)

declare %class.AssertHelper.10.235.285.1310.2335* @_ZN12AssertHelperC1EPKc(%class.AssertHelper.10.235.285.1310.2335* returned, i8*) unnamed_addr

declare void @_ZN12AssertHelperaSE7Message(%class.AssertHelper.10.235.285.1310.2335*, %class.Message.13.238.288.1313.2338*)

declare %class.Message.13.238.288.1313.2338* @_ZN7MessageC1Ev(%class.Message.13.238.288.1313.2338* returned) unnamed_addr

declare %class.AssertHelper.10.235.285.1310.2335* @_ZN12AssertHelperD1Ev(%class.AssertHelper.10.235.285.1310.2335* returned) unnamed_addr

; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #0

declare void @_ZN18ClientSocketHandle5m_fn3IPiEEvRK25Trans_NS___1_basic_stringIciiE13scoped_refptr15RequestPriorityN16ClientSocketPool13RespectLimitsERiT_11BoundNetLog(%class.ClientSocketHandle.14.239.289.1314.2339*, %class.Trans_NS___1_basic_string.18.243.293.1318.2343* dereferenceable(12), %class.scoped_refptr.19.244.294.1319.2344*, i32, i32, i32* dereferenceable(4), i32*, %class.BoundNetLog.20.245.295.1320.2345*)

declare void @_Z19CreateSOCKSv5Paramsv(%class.scoped_refptr.19.244.294.1319.2344* sret)

; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0

declare %class.BoundNetLog.20.245.295.1320.2345* @_ZN11BoundNetLogD1Ev(%class.BoundNetLog.20.245.295.1320.2345* returned) unnamed_addr

declare %class.scoped_refptr.19.244.294.1319.2344* @_ZN13scoped_refptrD1Ev(%class.scoped_refptr.19.244.294.1319.2344* returned) unnamed_addr

declare %"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346"* @_ZN12_GLOBAL__N_113__vector_baseD2Ev(%"class.(anonymous namespace)::__vector_base.21.246.296.1321.2346"* returned) unnamed_addr

declare i8* @_ZN25Trans_NS___1_basic_stringIciiE5m_fn1Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343*)

declare zeroext i1 @_Z6IsTruev()

declare void @_ZdlPv(i8*)

declare %"class.(anonymous namespace)::basic_iostream.22.247.297.1322.2347"* @_ZN12_GLOBAL__N_114basic_iostreamD2Ev(%"class.(anonymous namespace)::basic_iostream.22.247.297.1322.2347"* returned) unnamed_addr

declare %class.scoped_ptr.23.248.298.1323.2348* @_ZN10scoped_ptrI25Trans_NS___1_basic_stringIciiEED2Ev(%class.scoped_ptr.23.248.298.1323.2348* readonly returned) unnamed_addr align 2

declare void @_Z11CmpHelperEQPcS_xx(%class.AssertionResult.24.249.299.1324.2349* sret, i8*, i8*, i64, i64)

declare void @_ZN25Trans_NS___1_basic_stringIciiE5m_fn2Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343*)

attributes #0 = { argmemonly nounwind }
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/fp128-compare.ll
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ entry:
%cond = select i1 %cmp, fp128 %x, fp128 %y
ret fp128 %cond
; CHECK-LABEL: TestMax:
; CHECK: movaps %xmm1
; CHECK: movaps %xmm0
; CHECK: movaps %xmm1
; CHECK: callq __gttf2
; CHECK: movaps {{.*}}, %xmm0
; CHECK: testl %eax, %eax
Expand Down
121 changes: 121 additions & 0 deletions llvm/test/CodeGen/X86/hoist-spill.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
; RUN: llc < %s | FileCheck %s

; grep 'Spill' |sed 's%.*\(-[0-9]\+(\%rsp)\).*%\1%g' |sort |uniq -d |awk '{if (/rsp/); exit -1}'
; Check no spills to the same stack slot after hoisting.
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp)

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@a = external global i32*, align 8
@b = external global i32, align 4
@d = external global i32*, align 8

; Function Attrs: norecurse noreturn nounwind uwtable
define void @fn1(i32 %p1) {
entry:
%tmp = load i32*, i32** @d, align 8
%tmp1 = load i32*, i32** @a, align 8
%tmp2 = sext i32 %p1 to i64
br label %for.cond

for.cond: ; preds = %for.inc14, %entry
%indvar = phi i32 [ %indvar.next, %for.inc14 ], [ 0, %entry ]
%indvars.iv30.in = phi i32 [ %indvars.iv30, %for.inc14 ], [ %p1, %entry ]
%c.0 = phi i32 [ %inc15, %for.inc14 ], [ 1, %entry ]
%k.0 = phi i32 [ %k.1.lcssa, %for.inc14 ], [ undef, %entry ]
%tmp3 = icmp sgt i32 undef, 0
%smax52 = select i1 %tmp3, i32 undef, i32 0
%tmp4 = zext i32 %smax52 to i64
%tmp5 = icmp sgt i64 undef, %tmp4
%smax53 = select i1 %tmp5, i64 undef, i64 %tmp4
%tmp6 = add nsw i64 %smax53, 1
%tmp7 = sub nsw i64 %tmp6, %tmp4
%tmp8 = add nsw i64 %tmp7, -8
%tmp9 = sub i32 undef, %indvar
%tmp10 = icmp sgt i64 %tmp2, 0
%smax40 = select i1 %tmp10, i64 %tmp2, i64 0
%scevgep41 = getelementptr i32, i32* %tmp1, i64 %smax40
%indvars.iv30 = add i32 %indvars.iv30.in, -1
%tmp11 = icmp sgt i32 %indvars.iv30, 0
%smax = select i1 %tmp11, i32 %indvars.iv30, i32 0
%tmp12 = zext i32 %smax to i64
%sub = sub nsw i32 %p1, %c.0
%cmp = icmp sgt i32 %sub, 0
%sub. = select i1 %cmp, i32 %sub, i32 0
%cmp326 = icmp sgt i32 %k.0, %p1
br i1 %cmp326, label %for.cond4.preheader, label %for.body.preheader

for.body.preheader: ; preds = %for.cond
br label %for.body

for.cond4.preheader: ; preds = %for.body, %for.cond
%k.1.lcssa = phi i32 [ %k.0, %for.cond ], [ %add, %for.body ]
%cmp528 = icmp sgt i32 %sub., %p1
br i1 %cmp528, label %for.inc14, label %for.body6.preheader

for.body6.preheader: ; preds = %for.cond4.preheader
br i1 undef, label %for.body6, label %min.iters.checked

min.iters.checked: ; preds = %for.body6.preheader
br i1 undef, label %for.body6, label %vector.memcheck

vector.memcheck: ; preds = %min.iters.checked
%bound1 = icmp ule i32* undef, %scevgep41
%memcheck.conflict = and i1 undef, %bound1
br i1 %memcheck.conflict, label %for.body6, label %vector.body.preheader

vector.body.preheader: ; preds = %vector.memcheck
%lcmp.mod = icmp eq i64 undef, 0
br i1 %lcmp.mod, label %vector.body.preheader.split, label %vector.body.prol

vector.body.prol: ; preds = %vector.body.prol, %vector.body.preheader
%prol.iter.cmp = icmp eq i64 undef, 0
br i1 %prol.iter.cmp, label %vector.body.preheader.split, label %vector.body.prol

vector.body.preheader.split: ; preds = %vector.body.prol, %vector.body.preheader
%tmp13 = icmp ult i64 %tmp8, 24
br i1 %tmp13, label %middle.block, label %vector.body

vector.body: ; preds = %vector.body, %vector.body.preheader.split
%index = phi i64 [ %index.next.3, %vector.body ], [ 0, %vector.body.preheader.split ]
%index.next = add i64 %index, 8
%offset.idx.1 = add i64 %tmp12, %index.next
%tmp14 = getelementptr inbounds i32, i32* %tmp, i64 %offset.idx.1
%tmp15 = bitcast i32* %tmp14 to <4 x i32>*
%wide.load.1 = load <4 x i32>, <4 x i32>* %tmp15, align 4
%tmp16 = getelementptr inbounds i32, i32* %tmp1, i64 %offset.idx.1
%tmp17 = bitcast i32* %tmp16 to <4 x i32>*
store <4 x i32> %wide.load.1, <4 x i32>* %tmp17, align 4
%index.next.3 = add i64 %index, 32
br i1 undef, label %middle.block, label %vector.body

middle.block: ; preds = %vector.body, %vector.body.preheader.split
br i1 undef, label %for.inc14, label %for.body6

for.body: ; preds = %for.body, %for.body.preheader
%k.127 = phi i32 [ %k.0, %for.body.preheader ], [ %add, %for.body ]
%add = add nsw i32 %k.127, 1
%tmp18 = load i32, i32* undef, align 4
store i32 %tmp18, i32* @b, align 4
br i1 undef, label %for.body, label %for.cond4.preheader

for.body6: ; preds = %for.body6, %middle.block, %vector.memcheck, %min.iters.checked, %for.body6.preheader
%indvars.iv32 = phi i64 [ undef, %for.body6 ], [ %tmp12, %vector.memcheck ], [ %tmp12, %min.iters.checked ], [ %tmp12, %for.body6.preheader ], [ undef, %middle.block ]
%arrayidx8 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv32
%tmp19 = load i32, i32* %arrayidx8, align 4
%arrayidx10 = getelementptr inbounds i32, i32* %tmp1, i64 %indvars.iv32
store i32 %tmp19, i32* %arrayidx10, align 4
%cmp5 = icmp slt i64 %indvars.iv32, undef
br i1 %cmp5, label %for.body6, label %for.inc14

for.inc14: ; preds = %for.body6, %middle.block, %for.cond4.preheader
%inc15 = add nuw nsw i32 %c.0, 1
%indvar.next = add i32 %indvar, 1
br label %for.cond
}
161 changes: 161 additions & 0 deletions llvm/test/CodeGen/X86/interval-update-remat.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
; RUN: llc -verify-regalloc < %s
; PR27275: When enabling remat for vreg defined by PHIs, make sure the update
; of the live range removes dead phi. Otherwise, we may end up with PHIs with
; incorrect operands and that will trigger assertions or verifier failures
; in later passes.

target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386-unknown-linux-gnu"

@b = external global i64, align 8
@d = external global i32, align 4
@e = external global i64, align 8
@h = external global i16, align 2
@a = external global i8, align 1
@g = external global i64, align 8
@j = external global i32, align 4
@f = external global i16, align 2
@.str = external unnamed_addr constant [12 x i8], align 1

define void @fn1() {
entry:
%tmp = load i64, i64* @b, align 8
%or = or i64 0, 3299921317
%and = and i64 %or, %tmp
%tmp1 = load i32, i32* @d, align 4
br i1 undef, label %lor.rhs, label %lor.end

lor.rhs: ; preds = %entry
%tobool3 = icmp ne i8 undef, 0
br label %lor.end

lor.end: ; preds = %lor.rhs, %entry
%lor.ext = zext i1 undef to i32
%tmp2 = load i64, i64* @e, align 8
br i1 undef, label %lor.rhs5, label %lor.end7

lor.rhs5: ; preds = %lor.end
br label %lor.end7

lor.end7: ; preds = %lor.rhs5, %lor.end
%tmp3 = phi i1 [ true, %lor.end ], [ false, %lor.rhs5 ]
%neg13 = xor i64 %tmp, -1
%conv25 = zext i1 %tmp3 to i32
%tobool46 = icmp eq i64 %tmp, 0
%.pre = load i16, i16* @h, align 2
%tobool10 = icmp eq i16 %.pre, 0
%neg.us = xor i32 %tmp1, -1
%conv12.us = sext i32 %neg.us to i64
%tobool23.us = icmp eq i64 %tmp2, %and
%conv39.us = sext i32 %tmp1 to i64
br label %LABEL_mSmSDb

LABEL_mSmSDb.loopexit: ; preds = %lor.end32.us
%conv42.us.lcssa = phi i32 [ %conv42.us, %lor.end32.us ]
store i64 undef, i64* @g, align 8
br label %LABEL_mSmSDb

LABEL_mSmSDb: ; preds = %LABEL_mSmSDb.loopexit, %lor.end7
%tmp4 = phi i32 [ undef, %lor.end7 ], [ %conv42.us.lcssa, %LABEL_mSmSDb.loopexit ]
%tmp5 = phi i64 [ %tmp, %lor.end7 ], [ 0, %LABEL_mSmSDb.loopexit ]
br i1 %tobool10, label %LABEL_BRBRN.preheader, label %if.then

if.then: ; preds = %LABEL_mSmSDb
store i8 undef, i8* @a, align 1
br label %LABEL_BRBRN.preheader

LABEL_BRBRN.preheader: ; preds = %if.then, %LABEL_mSmSDb
%.pre63 = load i64, i64* @g, align 8
br i1 %tobool46, label %LABEL_BRBRN.us, label %LABEL_BRBRN.outer

LABEL_BRBRN.outer: ; preds = %if.then47, %LABEL_BRBRN.preheader
%.ph = phi i32 [ 0, %if.then47 ], [ %tmp4, %LABEL_BRBRN.preheader ]
%.ph64 = phi i32 [ %conv50, %if.then47 ], [ %tmp1, %LABEL_BRBRN.preheader ]
%.ph65 = phi i64 [ %tmp16, %if.then47 ], [ %.pre63, %LABEL_BRBRN.preheader ]
%.ph66 = phi i64 [ 0, %if.then47 ], [ %tmp2, %LABEL_BRBRN.preheader ]
%.ph67 = phi i64 [ %.pre56.pre, %if.then47 ], [ %tmp5, %LABEL_BRBRN.preheader ]
%neg = xor i32 %.ph64, -1
%conv12 = sext i32 %neg to i64
%tobool23 = icmp eq i64 %.ph66, %and
%tmp6 = load i32, i32* @j, align 4
%shr = lshr i32 %conv25, %tmp6
%conv39 = sext i32 %.ph64 to i64
br label %LABEL_BRBRN

LABEL_BRBRN.us: ; preds = %lor.end32.us, %LABEL_BRBRN.preheader
%tmp7 = phi i32 [ %conv42.us, %lor.end32.us ], [ %tmp4, %LABEL_BRBRN.preheader ]
%tmp8 = phi i64 [ undef, %lor.end32.us ], [ %.pre63, %LABEL_BRBRN.preheader ]
%tmp9 = phi i64 [ %tmp10, %lor.end32.us ], [ %tmp5, %LABEL_BRBRN.preheader ]
%mul.us = mul i64 %tmp8, %neg13
%mul14.us = mul i64 %mul.us, %conv12.us
%cmp.us = icmp sgt i64 %tmp2, %mul14.us
%conv16.us = zext i1 %cmp.us to i64
%xor.us = xor i64 %conv16.us, %tmp9
%rem18.us = urem i32 %lor.ext, %tmp7
%conv19.us = zext i32 %rem18.us to i64
br i1 %tobool23.us, label %lor.rhs24.us, label %lor.end32.us

lor.rhs24.us: ; preds = %LABEL_BRBRN.us
br label %lor.end32.us

lor.end32.us: ; preds = %lor.rhs24.us, %LABEL_BRBRN.us
%tmp10 = phi i64 [ -2, %LABEL_BRBRN.us ], [ -1, %lor.rhs24.us ]
%xor.us.not = xor i64 %xor.us, -1
%neg36.us = and i64 %conv19.us, %xor.us.not
%conv37.us = zext i32 %tmp7 to i64
%sub38.us = sub nsw i64 %neg36.us, %conv37.us
%mul40.us = mul nsw i64 %sub38.us, %conv39.us
%neg41.us = xor i64 %mul40.us, 4294967295
%conv42.us = trunc i64 %neg41.us to i32
%tobool43.us = icmp eq i8 undef, 0
br i1 %tobool43.us, label %LABEL_mSmSDb.loopexit, label %LABEL_BRBRN.us

LABEL_BRBRN: ; preds = %lor.end32, %LABEL_BRBRN.outer
%tmp11 = phi i32 [ %conv42, %lor.end32 ], [ %.ph, %LABEL_BRBRN.outer ]
%tmp12 = phi i64 [ %neg21, %lor.end32 ], [ %.ph65, %LABEL_BRBRN.outer ]
%tmp13 = phi i64 [ %conv35, %lor.end32 ], [ %.ph67, %LABEL_BRBRN.outer ]
%mul = mul i64 %tmp12, %neg13
%mul14 = mul i64 %mul, %conv12
%cmp = icmp sgt i64 %.ph66, %mul14
%conv16 = zext i1 %cmp to i64
%xor = xor i64 %conv16, %tmp13
%rem18 = urem i32 %lor.ext, %tmp11
%conv19 = zext i32 %rem18 to i64
%neg21 = or i64 %xor, undef
br i1 %tobool23, label %lor.rhs24, label %lor.end32

lor.rhs24: ; preds = %LABEL_BRBRN
%tmp14 = load volatile i16, i16* @f, align 2
%conv26 = sext i16 %tmp14 to i32
%and27 = and i32 %conv26, %shr
%conv28 = sext i32 %and27 to i64
%mul29 = mul nsw i64 %conv28, %tmp
%and30 = and i64 %mul29, %tmp13
%tobool31 = icmp ne i64 %and30, 0
br label %lor.end32

lor.end32: ; preds = %lor.rhs24, %LABEL_BRBRN
%tmp15 = phi i1 [ true, %LABEL_BRBRN ], [ %tobool31, %lor.rhs24 ]
%lor.ext33 = zext i1 %tmp15 to i32
%neg34 = xor i32 %lor.ext33, -1
%conv35 = sext i32 %neg34 to i64
%xor.not = xor i64 %xor, -1
%neg36 = and i64 %conv19, %xor.not
%conv37 = zext i32 %tmp11 to i64
%sub38 = sub nsw i64 %neg36, %conv37
%mul40 = mul nsw i64 %sub38, %conv39
%neg41 = xor i64 %mul40, 4294967295
%conv42 = trunc i64 %neg41 to i32
%tobool43 = icmp eq i8 undef, 0
br i1 %tobool43, label %if.then47, label %LABEL_BRBRN

if.then47: ; preds = %lor.end32
tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i64 %conv39)
%tmp16 = load i64, i64* @g, align 8
%neg49 = xor i64 %tmp16, 4294967295
%conv50 = trunc i64 %neg49 to i32
%.pre56.pre = load i64, i64* @b, align 8
br label %LABEL_BRBRN.outer
}

declare void @printf(i8* nocapture readonly, ...)
70 changes: 70 additions & 0 deletions llvm/test/CodeGen/X86/new-remat.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
; RUN: llc -verify-regalloc < %s | FileCheck %s
; Check all spills are rematerialized.
; CHECK-NOT: Spill

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@b = common global double 0.000000e+00, align 8
@a = common global i32 0, align 4

; Function Attrs: nounwind uwtable
define i32 @uniform_testdata(i32 %p1) {
entry:
%cmp3 = icmp sgt i32 %p1, 0
br i1 %cmp3, label %for.body.preheader, label %for.end

for.body.preheader: ; preds = %entry
%tmp = add i32 %p1, -1
%xtraiter = and i32 %p1, 7
%lcmp.mod = icmp eq i32 %xtraiter, 0
br i1 %lcmp.mod, label %for.body.preheader.split, label %for.body.prol.preheader

for.body.prol.preheader: ; preds = %for.body.preheader
br label %for.body.prol

for.body.prol: ; preds = %for.body.prol, %for.body.prol.preheader
%i.04.prol = phi i32 [ %inc.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
%prol.iter = phi i32 [ %prol.iter.sub, %for.body.prol ], [ %xtraiter, %for.body.prol.preheader ]
%tmp1 = load double, double* @b, align 8
%call.prol = tail call double @pow(double %tmp1, double 2.500000e-01)
%inc.prol = add nuw nsw i32 %i.04.prol, 1
%prol.iter.sub = add i32 %prol.iter, -1
%prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0
br i1 %prol.iter.cmp, label %for.body.preheader.split.loopexit, label %for.body.prol

for.body.preheader.split.loopexit: ; preds = %for.body.prol
%inc.prol.lcssa = phi i32 [ %inc.prol, %for.body.prol ]
br label %for.body.preheader.split

for.body.preheader.split: ; preds = %for.body.preheader.split.loopexit, %for.body.preheader
%i.04.unr = phi i32 [ 0, %for.body.preheader ], [ %inc.prol.lcssa, %for.body.preheader.split.loopexit ]
%tmp2 = icmp ult i32 %tmp, 7
br i1 %tmp2, label %for.end.loopexit, label %for.body.preheader.split.split

for.body.preheader.split.split: ; preds = %for.body.preheader.split
br label %for.body

for.body: ; preds = %for.body, %for.body.preheader.split.split
%i.04 = phi i32 [ %i.04.unr, %for.body.preheader.split.split ], [ %inc.7, %for.body ]
%tmp3 = load double, double* @b, align 8
%call = tail call double @pow(double %tmp3, double 2.500000e-01)
%tmp4 = load double, double* @b, align 8
%call.1 = tail call double @pow(double %tmp4, double 2.500000e-01)
%inc.7 = add nsw i32 %i.04, 8
%exitcond.7 = icmp eq i32 %inc.7, %p1
br i1 %exitcond.7, label %for.end.loopexit.unr-lcssa, label %for.body

for.end.loopexit.unr-lcssa: ; preds = %for.body
br label %for.end.loopexit

for.end.loopexit: ; preds = %for.end.loopexit.unr-lcssa, %for.body.preheader.split
br label %for.end

for.end: ; preds = %for.end.loopexit, %entry
%tmp5 = load i32, i32* @a, align 4
ret i32 %tmp5
}

; Function Attrs: nounwind
declare double @pow(double, double)
10 changes: 7 additions & 3 deletions llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s

; This testing case is reduced from 254.gap SyFgets function.
; We make sure a spill is not hoisted to a hotter outer loop.
; We make sure a spill is hoisted to a cold BB inside the hotter outer loop.

%struct.TMP.1 = type { %struct.TMP.2*, %struct.TMP.2*, [1024 x i8] }
%struct.TMP.2 = type { i8*, i32, i32, i16, i16, %struct.TMP.3, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.TMP.3, %struct.TMP.4*, i32, [3 x i8], [1 x i8], %struct.TMP.3, i32, i64 }
Expand Down Expand Up @@ -181,6 +181,10 @@ sw.bb474:
br i1 %cmp476, label %if.end517, label %do.body479.preheader

do.body479.preheader:
; CHECK: do.body479.preheader
; spill is hoisted here. Although loop depth1 is even hotter than loop depth2, do.body479.preheader is cold.
; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
; CHECK: land.rhs485
%cmp4833314 = icmp eq i8 undef, 0
br i1 %cmp4833314, label %if.end517, label %land.rhs485

Expand All @@ -200,8 +204,8 @@ land.lhs.true490:

lor.rhs500:
; CHECK: lor.rhs500
; Make sure that we don't hoist the spill to outer loops.
; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
; Make sure spill is hoisted to a cold preheader in outside loop.
; CHECK-NOT: movq %r{{.*}}, {{[0-9]+}}(%rsp)
; CHECK: callq {{.*}}maskrune
%call3.i.i2792 = call i32 @__maskrune(i32 undef, i64 256)
br i1 undef, label %land.lhs.true504, label %do.body479.backedge
Expand Down