Skip to content

Commit

Permalink
[SLP][NFC]Make canVectorizeLoads member of BoUpSLP class, NFC.
Browse files Browse the repository at this point in the history
  • Loading branch information
alexey-bataev committed Mar 4, 2024
1 parent 13a78fd commit 8982786
Showing 1 changed file with 43 additions and 35 deletions.
78 changes: 43 additions & 35 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,14 @@ class BoUpSLP {
class ShuffleInstructionBuilder;

public:
/// Tracks the state we can represent the loads in the given sequence.
enum class LoadsState {
Gather,
Vectorize,
ScatterVectorize,
StridedVectorize
};

using ValueList = SmallVector<Value *, 8>;
using InstrList = SmallVector<Instruction *, 16>;
using ValueSet = SmallPtrSet<Value *, 16>;
Expand Down Expand Up @@ -1184,6 +1192,19 @@ class BoUpSLP {
/// may not be necessary.
bool isLoadCombineCandidate() const;

/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
/// \param VL list of loads.
/// \param VL0 main load value.
/// \param Order returned order of load instructions.
/// \param PointerOps returned list of pointer operands.
/// \param TryRecursiveCheck used to check if long masked gather can be
/// represented as a serie of loads/insert subvector, if profitable.
LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps,
bool TryRecursiveCheck = true) const;

OptimizationRemarkEmitter *getORE() { return ORE; }

/// This structure holds any data we need about the edges being traversed
Expand Down Expand Up @@ -3957,11 +3978,6 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
return std::move(CurrentOrder);
}

namespace {
/// Tracks the state we can represent the loads in the given sequence.
enum class LoadsState { Gather, Vectorize, ScatterVectorize, StridedVectorize };
} // anonymous namespace

static bool arePointersCompatible(Value *Ptr1, Value *Ptr2,
const TargetLibraryInfo &TLI,
bool CompareOpcodes = true) {
Expand Down Expand Up @@ -3998,16 +4014,9 @@ static bool isReverseOrder(ArrayRef<unsigned> Order) {
});
}

/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
const Value *VL0,
const TargetTransformInfo &TTI,
const DataLayout &DL, ScalarEvolution &SE,
LoopInfo &LI, const TargetLibraryInfo &TLI,
SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps,
bool TryRecursiveCheck = true) {
BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps, bool TryRecursiveCheck) const {
// Check that a vectorized load would load the same memory as a scalar
// load. For example, we don't want to vectorize loads that are smaller
// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
Expand All @@ -4016,7 +4025,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
// unvectorized version.
Type *ScalarTy = VL0->getType();

if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy))
if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy))
return LoadsState::Gather;

// Make sure all loads in the bundle are simple - we can't vectorize
Expand All @@ -4036,9 +4045,9 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
Order.clear();
auto *VecTy = FixedVectorType::get(ScalarTy, Sz);
// Check the order of pointer operands or that all pointers are the same.
bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, Order);
if (IsSorted || all_of(PointerOps, [&](Value *P) {
return arePointersCompatible(P, PointerOps.front(), TLI);
return arePointersCompatible(P, PointerOps.front(), *TLI);
})) {
if (IsSorted) {
Value *Ptr0;
Expand All @@ -4051,7 +4060,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
PtrN = PointerOps[Order.back()];
}
std::optional<int> Diff =
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
// Check that the sorted loads are consecutive.
if (static_cast<unsigned>(*Diff) == Sz - 1)
return LoadsState::Vectorize;
Expand All @@ -4078,7 +4087,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
Align Alignment =
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
->getAlign();
if (TTI.isLegalStridedLoadStore(VecTy, Alignment)) {
if (TTI->isLegalStridedLoadStore(VecTy, Alignment)) {
// Iterate through all pointers and check if all distances are
// unique multiple of Dist.
SmallSet<int, 4> Dists;
Expand All @@ -4087,7 +4096,8 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
if (Ptr == PtrN)
Dist = *Diff;
else if (Ptr != Ptr0)
Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
Dist =
*getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
// If the strides are not the same or repeated, we can't
// vectorize.
if (((Dist / Stride) * Stride) != Dist ||
Expand All @@ -4100,11 +4110,11 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
}
}
}
auto CheckForShuffledLoads = [&](Align CommonAlignment) {
unsigned Sz = DL.getTypeSizeInBits(ScalarTy);
unsigned MinVF = R.getMinVF(Sz);
auto CheckForShuffledLoads = [&, &TTI = *TTI](Align CommonAlignment) {
unsigned Sz = DL->getTypeSizeInBits(ScalarTy);
unsigned MinVF = getMinVF(Sz);
unsigned MaxVF = std::max<unsigned>(bit_floor(VL.size() / 2), MinVF);
MaxVF = std::min(R.getMaximumVF(Sz, Instruction::Load), MaxVF);
MaxVF = std::min(getMaximumVF(Sz, Instruction::Load), MaxVF);
for (unsigned VF = MaxVF; VF >= MinVF; VF /= 2) {
unsigned VectorizedCnt = 0;
SmallVector<LoadsState> States;
Expand All @@ -4114,8 +4124,8 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
SmallVector<unsigned> Order;
SmallVector<Value *> PointerOps;
LoadsState LS =
canVectorizeLoads(R, Slice, Slice.front(), TTI, DL, SE, LI, TLI,
Order, PointerOps, /*TryRecursiveCheck=*/false);
canVectorizeLoads(Slice, Slice.front(), Order, PointerOps,
/*TryRecursiveCheck=*/false);
// Check that the sorted loads are consecutive.
if (LS == LoadsState::Gather)
break;
Expand Down Expand Up @@ -4175,7 +4185,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
// TODO: need to improve analysis of the pointers, if not all of them are
// GEPs or have > 2 operands, we end up with a gather node, which just
// increases the cost.
Loop *L = LI.getLoopFor(cast<LoadInst>(VL0)->getParent());
Loop *L = LI->getLoopFor(cast<LoadInst>(VL0)->getParent());
bool ProfitableGatherPointers =
L && Sz > 2 && count_if(PointerOps, [L](Value *V) {
return L->isLoopInvariant(V);
Expand All @@ -4187,8 +4197,8 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef<Value *> VL,
isa<Constant, Instruction>(GEP->getOperand(1)));
})) {
Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) &&
!TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment)) {
if (TTI->isLegalMaskedGather(VecTy, CommonAlignment) &&
!TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment)) {
// Check if potential masked gather can be represented as series
// of loads + insertsubvectors.
if (TryRecursiveCheck && CheckForShuffledLoads(CommonAlignment)) {
Expand Down Expand Up @@ -5635,8 +5645,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
// treats loading/storing it as an i8 struct. If we vectorize loads/stores
// from such a struct, we read/write packed bits disagreeing with the
// unvectorized version.
switch (canVectorizeLoads(*this, VL, VL0, *TTI, *DL, *SE, *LI, *TLI,
CurrentOrder, PointerOps)) {
switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps)) {
case LoadsState::Vectorize:
return TreeEntry::Vectorize;
case LoadsState::ScatterVectorize:
Expand Down Expand Up @@ -7416,9 +7425,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
!VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
SmallVector<Value *> PointerOps;
OrdersType CurrentOrder;
LoadsState LS =
canVectorizeLoads(R, Slice, Slice.front(), TTI, *R.DL, *R.SE,
*R.LI, *R.TLI, CurrentOrder, PointerOps);
LoadsState LS = R.canVectorizeLoads(Slice, Slice.front(),
CurrentOrder, PointerOps);
switch (LS) {
case LoadsState::Vectorize:
case LoadsState::ScatterVectorize:
Expand Down

0 comments on commit 8982786

Please sign in to comment.