460 changes: 303 additions & 157 deletions llvm/lib/Transforms/Scalar/SROA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,273 @@ class AllocaSlices {
const_iterator end() const { return Slices.end(); }
/// @}

// Forward declare an iterator to befriend it.
class partition_iterator;

/// \brief A partition of the slices.
///
/// An ephemeral representation for a range of slices which can be viewed as
/// a partition of the alloca. This range represents a span of the alloca's
/// memory which cannot be split, and provides access to all of the slices
/// overlapping some part of the partition.
///
/// Objects of this type are produced by traversing the alloca's slices, but
/// are only ephemeral and not persistent.
class Partition {
private:
friend class AllocaSlices;
friend class AllocaSlices::partition_iterator;

/// \brief The begining and ending offsets of the alloca for this partition.
uint64_t BeginOffset, EndOffset;

/// \brief The start end end iterators of this partition.
iterator SI, SJ;

/// \brief A collection of split slices.
SmallVector<Slice *, 4> SplitSlices;

/// \brief Raw constructor builds an empty partition starting and ending at
/// the given iterator.
Partition(iterator SI) : SI(SI), SJ(SI) {}

public:
/// \brief The start offset of this partition.
///
/// All of the contained slices start at or after this offset.
uint64_t beginOffset() const { return BeginOffset; }

/// \brief The end offset of this partition.
///
/// All of the contained slices end at or before this offset.
uint64_t endOffset() const { return EndOffset; }

/// \brief The size of the partition.
///
/// Note that this can never be zero.
uint64_t size() const {
assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
return EndOffset - BeginOffset;
}

/// \brief Test whether this partition contains no slices, and merely spans
/// a region occupied by split slices.
bool empty() const { return SI == SJ; }

/// \name Iterate contained slices.
/// All of these slices are fully contained in the partition. They may be
/// splittable or unsplittable.
/// @{
iterator begin() const { return SI; }
iterator end() const { return SJ; }
/// @}

/// \brief Get the sequence of split slices.
ArrayRef<Slice *> splitSlices() const { return SplitSlices; }
};

/// \brief An iterator over partitions of the alloca's slices.
///
/// This iterator implements the core algorithm for partitioning the alloca's
/// slices. It is a forward iterator as we don't support backtracking for
/// efficiency reasons, and re-use a single storage area to maintain the
/// current set of split slices.
///
/// It is templated on the slice iterator type to use so that it can operate
/// with either const or non-const slice iterators.
class partition_iterator
: public iterator_facade_base<partition_iterator,
std::forward_iterator_tag, Partition> {
friend class AllocaSlices;

/// \brief Most of the state for walking the partitions is held in a class
/// with a nice interface for examining them.
Partition P;

/// \brief We need to keep the end of the slices to know when to stop.
AllocaSlices::iterator SE;

/// \brief We also need to keep track of the maximum split end offset seen.
/// FIXME: Do we really?
uint64_t MaxSplitSliceEndOffset;

/// \brief Sets the partition to be empty at given iterator, and sets the
/// end iterator.
partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
: P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
// If not already at the end, advance our state to form the initial
// partition.
if (SI != SE)
advance();
}

/// \brief Advance the iterator to the next partition.
///
/// Requires that the iterator not be at the end of the slices.
void advance() {
assert((P.SI != SE || !P.SplitSlices.empty()) &&
"Cannot advance past the end of the slices!");

// Clear out any split uses which have ended.
if (!P.SplitSlices.empty()) {
if (P.EndOffset >= MaxSplitSliceEndOffset) {
// If we've finished all splits, this is easy.
P.SplitSlices.clear();
MaxSplitSliceEndOffset = 0;
} else {
// Remove the uses which have ended in the prior partition. This
// cannot change the max split slice end because we just checked that
// the prior partition ended prior to that max.
P.SplitSlices.erase(
std::remove_if(
P.SplitSlices.begin(), P.SplitSlices.end(),
[&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
P.SplitSlices.end());
assert(std::any_of(P.SplitSlices.begin(), P.SplitSlices.end(),
[&](Slice *S) {
return S->endOffset() == MaxSplitSliceEndOffset;
}) &&
"Could not find the current max split slice offset!");
assert(std::all_of(P.SplitSlices.begin(), P.SplitSlices.end(),
[&](Slice *S) {
return S->endOffset() <= MaxSplitSliceEndOffset;
}) &&
"Max split slice end offset is not actually the max!");
}
}

// If P.SI is already at the end, then we've cleared the split tail and
// now have an end iterator.
if (P.SI == SE) {
assert(P.SplitSlices.empty() && "Failed to clear the split slices!");
return;
}

// If we had a non-empty partition previously, set up the state for
// subsequent partitions.
if (P.SI != P.SJ) {
// Accumulate all the splittable slices which started in the old
// partition into the split list.
for (Slice &S : P)
if (S.isSplittable() && S.endOffset() > P.EndOffset) {
P.SplitSlices.push_back(&S);
MaxSplitSliceEndOffset =
std::max(S.endOffset(), MaxSplitSliceEndOffset);
}

// Start from the end of the previous partition.
P.SI = P.SJ;

// If P.SI is now at the end, we at most have a tail of split slices.
if (P.SI == SE) {
P.BeginOffset = P.EndOffset;
P.EndOffset = MaxSplitSliceEndOffset;
return;
}

// If the we have split slices and the next slice is after a gap and is
// not splittable immediately form an empty partition for the split
// slices up until the next slice begins.
if (!P.SplitSlices.empty() && P.SI->beginOffset() != P.EndOffset &&
!P.SI->isSplittable()) {
P.BeginOffset = P.EndOffset;
P.EndOffset = P.SI->beginOffset();
return;
}
}

// OK, we need to consume new slices. Set the end offset based on the
// current slice, and step SJ past it. The beginning offset of the
// parttion is the beginning offset of the next slice unless we have
// pre-existing split slices that are continuing, in which case we begin
// at the prior end offset.
P.BeginOffset = P.SplitSlices.empty() ? P.SI->beginOffset() : P.EndOffset;
P.EndOffset = P.SI->endOffset();
++P.SJ;

// There are two strategies to form a partition based on whether the
// partition starts with an unsplittable slice or a splittable slice.
if (!P.SI->isSplittable()) {
// When we're forming an unsplittable region, it must always start at
// the first slice and will extend through its end.
assert(P.BeginOffset == P.SI->beginOffset());

// Form a partition including all of the overlapping slices with this
// unsplittable slice.
while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
if (!P.SJ->isSplittable())
P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
++P.SJ;
}

// We have a partition across a set of overlapping unsplittable
// partitions.
return;
}

// If we're starting with a splittable slice, then we need to form
// a synthetic partition spanning it and any other overlapping splittable
// splices.
assert(P.SI->isSplittable() && "Forming a splittable partition!");

// Collect all of the overlapping splittable slices.
while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
P.SJ->isSplittable()) {
P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
++P.SJ;
}

// Back upiP.EndOffset if we ended the span early when encountering an
// unsplittable slice. This synthesizes the early end offset of
// a partition spanning only splittable slices.
if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
assert(!P.SJ->isSplittable());
P.EndOffset = P.SJ->beginOffset();
}
}

public:
bool operator==(const partition_iterator &RHS) const {
assert(SE == RHS.SE &&
"End iterators don't match between compared partition iterators!");

// The observed positions of partitions is marked by the P.SI iterator and
// the emptyness of the split slices. The latter is only relevant when
// P.SI == SE, as the end iterator will additionally have an empty split
// slices list, but the prior may have the same P.SI and a tail of split
// slices.
if (P.SI == RHS.P.SI &&
P.SplitSlices.empty() == RHS.P.SplitSlices.empty()) {
assert(P.SJ == RHS.P.SJ &&
"Same set of slices formed two different sized partitions!");
assert(P.SplitSlices.size() == RHS.P.SplitSlices.size() &&
"Same slice position with differently sized non-empty split "
"slices sets!");
return true;
}
return false;
}

partition_iterator &operator++() {
advance();
return *this;
}

Partition &operator*() { return P; }
};

/// \brief A forward range over the partitions of the alloca's slices.
///
/// This accesses an iterator range over the partitions of the alloca's
/// slices. It computes these partitions on the fly based on the overlapping
/// offsets of the slices and the ability to split them. It will visit "empty"
/// partitions to cover regions of the alloca only accessed via split
/// slices.
iterator_range<partition_iterator> partitions() {
return make_range(partition_iterator(begin(), end()),
partition_iterator(end(), end()));
}

/// \brief Access the dead users for this alloca.
ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }

Expand Down Expand Up @@ -974,9 +1241,7 @@ class SROA : public FunctionPass {
friend class AllocaSliceRewriter;

bool rewritePartition(AllocaInst &AI, AllocaSlices &AS,
AllocaSlices::iterator B, AllocaSlices::iterator E,
int64_t BeginOffset, int64_t EndOffset,
ArrayRef<AllocaSlices::iterator> SplitUses);
AllocaSlices::Partition &P);
bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
bool runOnAlloca(AllocaInst &AI);
void clobberUse(Use &U);
Expand Down Expand Up @@ -3171,39 +3436,36 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
AllocaSlices::iterator B, AllocaSlices::iterator E,
int64_t BeginOffset, int64_t EndOffset,
ArrayRef<AllocaSlices::iterator> SplitUses) {
assert(BeginOffset < EndOffset);
uint64_t SliceSize = EndOffset - BeginOffset;

AllocaSlices::Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
Type *SliceTy = nullptr;
if (Type *CommonUseTy = findCommonType(B, E, EndOffset))
if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize)
if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()))
if (DL->getTypeAllocSize(CommonUseTy) >= P.size())
SliceTy = CommonUseTy;
if (!SliceTy)
if (Type *TypePartitionTy = getTypePartition(*DL, AI.getAllocatedType(),
BeginOffset, SliceSize))
P.beginOffset(), P.size()))
SliceTy = TypePartitionTy;
if ((!SliceTy || (SliceTy->isArrayTy() &&
SliceTy->getArrayElementType()->isIntegerTy())) &&
DL->isLegalInteger(SliceSize * 8))
SliceTy = Type::getIntNTy(*C, SliceSize * 8);
DL->isLegalInteger(P.size() * 8))
SliceTy = Type::getIntNTy(*C, P.size() * 8);
if (!SliceTy)
SliceTy = ArrayType::get(Type::getInt8Ty(*C), SliceSize);
assert(DL->getTypeAllocSize(SliceTy) >= SliceSize);
SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
assert(DL->getTypeAllocSize(SliceTy) >= P.size());

bool IsIntegerPromotable = isIntegerWideningViable(
*DL, SliceTy, BeginOffset, AllocaSlices::const_range(B, E), SplitUses);
*DL, SliceTy, P.beginOffset(),
AllocaSlices::const_range(P.begin(), P.end()), P.splitSlices());

VectorType *VecTy =
IsIntegerPromotable
? nullptr
: isVectorPromotionViable(*DL, BeginOffset, EndOffset,
AllocaSlices::const_range(B, E), SplitUses);
: isVectorPromotionViable(
*DL, P.beginOffset(), P.endOffset(),
AllocaSlices::const_range(P.begin(), P.end()), P.splitSlices());
if (VecTy)
SliceTy = VecTy;

Expand All @@ -3213,7 +3475,8 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// perform phi and select speculation.
AllocaInst *NewAI;
if (SliceTy == AI.getAllocatedType()) {
assert(BeginOffset == 0 && "Non-zero begin offset but same alloca type");
assert(P.beginOffset() == 0 &&
"Non-zero begin offset but same alloca type");
NewAI = &AI;
// FIXME: We should be able to bail at this point with "nothing changed".
// FIXME: We might want to defer PHI speculation until after here.
Expand All @@ -3225,14 +3488,14 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// type.
Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
}
Alignment = MinAlign(Alignment, BeginOffset);
Alignment = MinAlign(Alignment, P.beginOffset());
// If we will get at least this much alignment from the type alone, leave
// the alloca's alignment unconstrained.
if (Alignment <= DL->getABITypeAlignment(SliceTy))
Alignment = 0;
NewAI =
new AllocaInst(SliceTy, nullptr, Alignment,
AI.getName() + ".sroa." + Twine(B - AS.begin()), &AI);
NewAI = new AllocaInst(
SliceTy, nullptr, Alignment,
AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
++NumNewAllocas;

// Migrate debug information from the old alloca to the new alloca
Expand All @@ -3247,9 +3510,9 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// with a piece. It would be even better to just compare against the size
// of the type described in the debug info, but then we would need to
// build an expensive DIRefMap.
if (SliceSize < DL->getTypeAllocSize(AI.getAllocatedType()) ||
if (P.size() < DL->getTypeAllocSize(AI.getAllocatedType()) ||
DIExpression(DbgDecl->getExpression()).isVariablePiece())
Piece = DIB.createPieceExpression(BeginOffset, SliceSize);
Piece = DIB.createPieceExpression(P.beginOffset(), P.size());
Instruction *NewDDI = DIB.insertDeclare(NewAI, Var, Piece, &AI);
NewDDI->setDebugLoc(DbgDecl->getDebugLoc());
DbgDeclares.insert(std::make_pair(NewAI, cast<DbgDeclareInst>(NewDDI)));
Expand All @@ -3258,8 +3521,8 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
}

DEBUG(dbgs() << "Rewriting alloca partition "
<< "[" << BeginOffset << "," << EndOffset << ") to: " << *NewAI
<< "\n");
<< "[" << P.beginOffset() << "," << P.endOffset()
<< ") to: " << *NewAI << "\n");

// Track the high watermark on the worklist as it is only relevant for
// promoted allocas. We will reset it to this point if the alloca is not in
Expand All @@ -3269,20 +3532,20 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
SmallPtrSet<PHINode *, 8> PHIUsers;
SmallPtrSet<SelectInst *, 8> SelectUsers;

AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, BeginOffset,
EndOffset, IsIntegerPromotable, VecTy, PHIUsers,
SelectUsers);
AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, P.beginOffset(),
P.endOffset(), IsIntegerPromotable, VecTy,
PHIUsers, SelectUsers);
bool Promotable = true;
for (auto &SplitUse : SplitUses) {
for (Slice *S : P.splitSlices()) {
DEBUG(dbgs() << " rewriting split ");
DEBUG(AS.printSlice(dbgs(), SplitUse, ""));
Promotable &= Rewriter.visit(SplitUse);
DEBUG(AS.printSlice(dbgs(), S, ""));
Promotable &= Rewriter.visit(S);
++NumUses;
}
for (AllocaSlices::iterator I = B; I != E; ++I) {
for (Slice &S : P) {
DEBUG(dbgs() << " rewriting ");
DEBUG(AS.printSlice(dbgs(), I, ""));
Promotable &= Rewriter.visit(I);
DEBUG(AS.printSlice(dbgs(), &S, ""));
Promotable &= Rewriter.visit(&S);
++NumUses;
}

Expand Down Expand Up @@ -3340,32 +3603,6 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
return true;
}

static void
removeFinishedSplitUses(SmallVectorImpl<AllocaSlices::iterator> &SplitUses,
uint64_t &MaxSplitUseEndOffset, uint64_t Offset) {
if (Offset >= MaxSplitUseEndOffset) {
SplitUses.clear();
MaxSplitUseEndOffset = 0;
return;
}

size_t SplitUsesOldSize = SplitUses.size();
SplitUses.erase(std::remove_if(
SplitUses.begin(), SplitUses.end(),
[Offset](const AllocaSlices::iterator &I) {
return I->endOffset() <= Offset;
}),
SplitUses.end());
if (SplitUsesOldSize == SplitUses.size())
return;

// Recompute the max. While this is linear, so is remove_if.
MaxSplitUseEndOffset = 0;
for (AllocaSlices::iterator SplitUse : SplitUses)
MaxSplitUseEndOffset =
std::max(SplitUse->endOffset(), MaxSplitUseEndOffset);
}

/// \brief Walks the slices of an alloca and form partitions based on them,
/// rewriting each of their uses.
bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
Expand All @@ -3374,102 +3611,11 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {

unsigned NumPartitions = 0;
bool Changed = false;
SmallVector<AllocaSlices::iterator, 4> SplitUses;
uint64_t MaxSplitUseEndOffset = 0;

uint64_t BeginOffset = AS.begin()->beginOffset();

for (AllocaSlices::iterator SI = AS.begin(), SJ = std::next(SI),
SE = AS.end();
SI != SE; SI = SJ) {
uint64_t MaxEndOffset = SI->endOffset();

if (!SI->isSplittable()) {
// When we're forming an unsplittable region, it must always start at the
// first slice and will extend through its end.
assert(BeginOffset == SI->beginOffset());

// Form a partition including all of the overlapping slices with this
// unsplittable slice.
while (SJ != SE && SJ->beginOffset() < MaxEndOffset) {
if (!SJ->isSplittable())
MaxEndOffset = std::max(MaxEndOffset, SJ->endOffset());
++SJ;
}
} else {
assert(SI->isSplittable()); // Established above.

// Collect all of the overlapping splittable slices.
while (SJ != SE && SJ->beginOffset() < MaxEndOffset &&
SJ->isSplittable()) {
MaxEndOffset = std::max(MaxEndOffset, SJ->endOffset());
++SJ;
}

// Back up MaxEndOffset and SJ if we ended the span early when
// encountering an unsplittable slice.
if (SJ != SE && SJ->beginOffset() < MaxEndOffset) {
assert(!SJ->isSplittable());
MaxEndOffset = SJ->beginOffset();
}
}

// Check if we have managed to move the end offset forward yet. If so,
// we'll have to rewrite uses and erase old split uses.
if (BeginOffset < MaxEndOffset) {
// Rewrite a sequence of overlapping slices.
Changed |= rewritePartition(AI, AS, SI, SJ, BeginOffset, MaxEndOffset,
SplitUses);
++NumPartitions;

removeFinishedSplitUses(SplitUses, MaxSplitUseEndOffset, MaxEndOffset);
}

// Accumulate all the splittable slices from the [SI,SJ) region which
// overlap going forward.
for (AllocaSlices::iterator SK = SI; SK != SJ; ++SK)
if (SK->isSplittable() && SK->endOffset() > MaxEndOffset) {
SplitUses.push_back(SK);
MaxSplitUseEndOffset = std::max(SK->endOffset(), MaxSplitUseEndOffset);
}

// If we're already at the end and we have no split uses, we're done.
if (SJ == SE && SplitUses.empty())
break;

// If we have no split uses or no gap in offsets, we're ready to move to
// the next slice.
if (SplitUses.empty() || (SJ != SE && MaxEndOffset == SJ->beginOffset())) {
BeginOffset = SJ->beginOffset();
continue;
}

// Even if we have split slices, if the next slice is splittable and the
// split slices reach it, we can simply set up the beginning offset of the
// next iteration to bridge between them.
if (SJ != SE && SJ->isSplittable() &&
MaxSplitUseEndOffset > SJ->beginOffset()) {
BeginOffset = MaxEndOffset;
continue;
}

// Otherwise, we have a tail of split slices. Rewrite them with an empty
// range of slices.
uint64_t PostSplitEndOffset =
SJ == SE ? MaxSplitUseEndOffset : SJ->beginOffset();

Changed |= rewritePartition(AI, AS, SJ, SJ, MaxEndOffset,
PostSplitEndOffset, SplitUses);
// Rewrite each parttion.
for (auto &P : AS.partitions()) {
Changed |= rewritePartition(AI, AS, P);
++NumPartitions;

if (SJ == SE)
break; // Skip the rest, we don't need to do any cleanup.

removeFinishedSplitUses(SplitUses, MaxSplitUseEndOffset,
PostSplitEndOffset);

// Now just reset the begin offset for the next iteration.
BeginOffset = SJ->beginOffset();
}

NumAllocaPartitions += NumPartitions;
Expand Down