Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 53 additions & 55 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2199,6 +2199,7 @@ class BoUpSLP {
LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps,
const SCEV **StrideSCEV = nullptr,
unsigned *BestVF = nullptr,
bool TryRecursiveCheck = true) const;

Expand Down Expand Up @@ -3879,6 +3880,10 @@ class BoUpSLP {
};
EntryState State;

// If we decide to generate a strided load for this Entry, `StrideSCEV`
// will be set.
const SCEV *StrideSCEV = nullptr;

/// List of combined opcodes supported by the vectorizer.
enum CombinedOpcode {
NotCombinedOp = -1,
Expand Down Expand Up @@ -4430,11 +4435,10 @@ class BoUpSLP {

/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
TreeEntry::EntryState
getScalarsVectorizationState(const InstructionsState &S, ArrayRef<Value *> VL,
bool IsScatterVectorizeUserTE,
OrdersType &CurrentOrder,
SmallVectorImpl<Value *> &PointerOps);
TreeEntry::EntryState getScalarsVectorizationState(
const InstructionsState &S, ArrayRef<Value *> VL,
bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder,
SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV);

/// Maps a specific scalar to its tree entry(ies).
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
Expand Down Expand Up @@ -5734,17 +5738,13 @@ static bool isReverseOrder(ArrayRef<unsigned> Order) {
}

/// Checks if the provided list of pointers \p Pointers represents the strided
/// pointers for type ElemTy. If they are not, std::nullopt is returned.
/// Otherwise, if \p Inst is not specified, just initialized optional value is
/// returned to show that the pointers represent strided pointers. If \p Inst
/// specified, the runtime stride is materialized before the given \p Inst.
/// \returns std::nullopt if the pointers are not pointers with the runtime
/// stride, nullptr or actual stride value, otherwise.
static std::optional<Value *>
calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const DataLayout &DL, ScalarEvolution &SE,
SmallVectorImpl<unsigned> &SortedIndices,
Instruction *Inst = nullptr) {
/// pointers for type ElemTy. If they are not, `false` is returned.
/// Otherwise, if `true` is returned. If `StrideSCEV` is not nullptr,
/// it is set the the SCEV of the run-time stride.
static bool calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const DataLayout &DL, ScalarEvolution &SE,
SmallVectorImpl<unsigned> &SortedIndices,
const SCEV **StrideSCEV) {
SmallVector<const SCEV *> SCEVs;
const SCEV *PtrSCEVLowest = nullptr;
const SCEV *PtrSCEVHighest = nullptr;
Expand All @@ -5753,22 +5753,22 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
for (Value *Ptr : PointerOps) {
const SCEV *PtrSCEV = SE.getSCEV(Ptr);
if (!PtrSCEV)
return std::nullopt;
return false;
SCEVs.push_back(PtrSCEV);
if (!PtrSCEVLowest && !PtrSCEVHighest) {
PtrSCEVLowest = PtrSCEVHighest = PtrSCEV;
continue;
}
const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest);
if (isa<SCEVCouldNotCompute>(Diff))
return std::nullopt;
return false;
if (Diff->isNonConstantNegative()) {
PtrSCEVLowest = PtrSCEV;
continue;
}
const SCEV *Diff1 = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEV);
if (isa<SCEVCouldNotCompute>(Diff1))
return std::nullopt;
return false;
if (Diff1->isNonConstantNegative()) {
PtrSCEVHighest = PtrSCEV;
continue;
Expand All @@ -5777,7 +5777,7 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
// Dist = PtrSCEVHighest - PtrSCEVLowest;
const SCEV *Dist = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEVLowest);
if (isa<SCEVCouldNotCompute>(Dist))
return std::nullopt;
return false;
int Size = DL.getTypeStoreSize(ElemTy);
auto TryGetStride = [&](const SCEV *Dist,
const SCEV *Multiplier) -> const SCEV * {
Expand All @@ -5798,10 +5798,10 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const SCEV *Sz = SE.getConstant(Dist->getType(), Size * (SCEVs.size() - 1));
Stride = TryGetStride(Dist, Sz);
if (!Stride)
return std::nullopt;
return false;
}
if (!Stride || isa<SCEVConstant>(Stride))
return std::nullopt;
return false;
// Iterate through all pointers and check if all distances are
// unique multiple of Stride.
using DistOrdPair = std::pair<int64_t, int>;
Expand All @@ -5815,28 +5815,28 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest);
const SCEV *Coeff = TryGetStride(Diff, Stride);
if (!Coeff)
return std::nullopt;
return false;
const auto *SC = dyn_cast<SCEVConstant>(Coeff);
if (!SC || isa<SCEVCouldNotCompute>(SC))
return std::nullopt;
return false;
if (!SE.getMinusSCEV(PtrSCEV, SE.getAddExpr(PtrSCEVLowest,
SE.getMulExpr(Stride, SC)))
->isZero())
return std::nullopt;
return false;
Dist = SC->getAPInt().getZExtValue();
}
// If the strides are not the same or repeated, we can't vectorize.
if ((Dist / Size) * Size != Dist || (Dist / Size) >= SCEVs.size())
return std::nullopt;
return false;
auto Res = Offsets.emplace(Dist, Cnt);
if (!Res.second)
return std::nullopt;
return false;
// Consecutive order if the inserted element is the last one.
IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
++Cnt;
}
if (Offsets.size() != SCEVs.size())
return std::nullopt;
return false;
SortedIndices.clear();
if (!IsConsecutive) {
// Fill SortedIndices array only if it is non-consecutive.
Expand All @@ -5847,10 +5847,9 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
++Cnt;
}
}
if (!Inst)
return nullptr;
SCEVExpander Expander(SE, DL, "strided-load-vec");
return Expander.expandCodeFor(Stride, Stride->getType(), Inst);
if (StrideSCEV)
*StrideSCEV = Stride;
return true;
}

static std::pair<InstructionCost, InstructionCost>
Expand Down Expand Up @@ -6246,11 +6245,10 @@ static bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
return false;
}

BoUpSLP::LoadsState
BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps,
unsigned *BestVF, bool TryRecursiveCheck) const {
BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV,
unsigned *BestVF, bool TryRecursiveCheck) const {
// Check that a vectorized load would load the same memory as a scalar
// load. For example, we don't want to vectorize loads that are smaller
// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
Expand Down Expand Up @@ -6289,7 +6287,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
if (!IsSorted) {
if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) {
if (TTI->isLegalStridedLoadStore(VecTy, CommonAlignment) &&
calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order))
calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order, StrideSCEV))
return LoadsState::StridedVectorize;
}

Expand Down Expand Up @@ -6418,9 +6416,9 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
SmallVector<unsigned> Order;
SmallVector<Value *> PointerOps;
LoadsState LS =
canVectorizeLoads(Slice, Slice.front(), Order, PointerOps, BestVF,
/*TryRecursiveCheck=*/false);
LoadsState LS = canVectorizeLoads(Slice, Slice.front(), Order,
PointerOps, StrideSCEV, BestVF,
/*TryRecursiveCheck=*/false);
// Check that the sorted loads are consecutive.
if (LS == LoadsState::Gather) {
if (BestVF) {
Expand Down Expand Up @@ -8565,8 +8563,9 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
// Try to build vector load.
ArrayRef<Value *> Values(
reinterpret_cast<Value *const *>(Slice.begin()), Slice.size());
LoadsState LS = canVectorizeLoads(Values, Slice.front(), CurrentOrder,
PointerOps, &BestVF);
LoadsState LS =
canVectorizeLoads(Values, Slice.front(), CurrentOrder, PointerOps,
/*StrideSCEV = */ nullptr, &BestVF);
if (LS != LoadsState::Gather ||
(BestVF > 1 && static_cast<unsigned>(NumElts) == 2 * BestVF)) {
if (LS == LoadsState::ScatterVectorize) {
Expand Down Expand Up @@ -9171,7 +9170,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
const InstructionsState &S, ArrayRef<Value *> VL,
bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder,
SmallVectorImpl<Value *> &PointerOps) {
SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV) {
assert(S.getMainOp() &&
"Expected instructions with same/alternate opcodes only.");

Expand Down Expand Up @@ -9273,7 +9272,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
});
});
};
switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps)) {
switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps, StrideSCEV)) {
case LoadsState::Vectorize:
return TreeEntry::Vectorize;
case LoadsState::CompressVectorize:
Expand Down Expand Up @@ -10710,8 +10709,9 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
OrdersType CurrentOrder;
SmallVector<Value *> PointerOps;
const SCEV *StrideSCEV = nullptr;
TreeEntry::EntryState State = getScalarsVectorizationState(
S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps, &StrideSCEV);
if (State == TreeEntry::NeedToGather) {
newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
return;
Expand Down Expand Up @@ -10871,6 +10871,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
TE->StrideSCEV = StrideSCEV;
LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (strided LoadInst).\n";
TE->dump());
break;
Expand Down Expand Up @@ -18711,16 +18712,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride *
DL->getTypeAllocSize(ScalarTy));
} else {
SmallVector<Value *> PointerOps(E->Scalars.size(), nullptr);
transform(E->Scalars, PointerOps.begin(), [](Value *V) {
return cast<LoadInst>(V)->getPointerOperand();
});
OrdersType Order;
std::optional<Value *> Stride =
calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order,
&*Builder.GetInsertPoint());
const SCEV *StrideSCEV = E->StrideSCEV;
assert(StrideSCEV);
SCEVExpander Expander(*SE, *DL, "strided-load-vec");
Value *Stride = Expander.expandCodeFor(
StrideSCEV, StrideSCEV->getType(), &*Builder.GetInsertPoint());
Value *NewStride =
Builder.CreateIntCast(*Stride, StrideTy, /*isSigned=*/true);
Builder.CreateIntCast(Stride, StrideTy, /*isSigned=*/true);
StrideVal = Builder.CreateMul(
NewStride,
ConstantInt::get(
Expand Down