diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index daea3bdce6889..e0e3648f718eb 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -980,6 +980,14 @@ class BoUpSLP { class ShuffleInstructionBuilder; public: + /// Tracks the state we can represent the loads in the given sequence. + enum class LoadsState { + Gather, + Vectorize, + ScatterVectorize, + StridedVectorize + }; + using ValueList = SmallVector; using InstrList = SmallVector; using ValueSet = SmallPtrSet; @@ -1184,6 +1192,19 @@ class BoUpSLP { /// may not be necessary. bool isLoadCombineCandidate() const; + /// Checks if the given array of loads can be represented as a vectorized, + /// scatter or just simple gather. + /// \param VL list of loads. + /// \param VL0 main load value. + /// \param Order returned order of load instructions. + /// \param PointerOps returned list of pointer operands. + /// \param TryRecursiveCheck used to check if long masked gather can be + /// represented as a serie of loads/insert subvector, if profitable. + LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, + SmallVectorImpl &Order, + SmallVectorImpl &PointerOps, + bool TryRecursiveCheck = true) const; + OptimizationRemarkEmitter *getORE() { return ORE; } /// This structure holds any data we need about the edges being traversed @@ -3957,11 +3978,6 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { return std::move(CurrentOrder); } -namespace { -/// Tracks the state we can represent the loads in the given sequence. -enum class LoadsState { Gather, Vectorize, ScatterVectorize, StridedVectorize }; -} // anonymous namespace - static bool arePointersCompatible(Value *Ptr1, Value *Ptr2, const TargetLibraryInfo &TLI, bool CompareOpcodes = true) { @@ -3998,16 +4014,9 @@ static bool isReverseOrder(ArrayRef Order) { }); } -/// Checks if the given array of loads can be represented as a vectorized, -/// scatter or just simple gather. -static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef VL, - const Value *VL0, - const TargetTransformInfo &TTI, - const DataLayout &DL, ScalarEvolution &SE, - LoopInfo &LI, const TargetLibraryInfo &TLI, - SmallVectorImpl &Order, - SmallVectorImpl &PointerOps, - bool TryRecursiveCheck = true) { +BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( + ArrayRef VL, const Value *VL0, SmallVectorImpl &Order, + SmallVectorImpl &PointerOps, bool TryRecursiveCheck) const { // Check that a vectorized load would load the same memory as a scalar // load. For example, we don't want to vectorize loads that are smaller // than 8-bit. Even though we have a packed struct {} LLVM @@ -4016,7 +4025,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef VL, // unvectorized version. Type *ScalarTy = VL0->getType(); - if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy)) + if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) return LoadsState::Gather; // Make sure all loads in the bundle are simple - we can't vectorize @@ -4036,9 +4045,9 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef VL, Order.clear(); auto *VecTy = FixedVectorType::get(ScalarTy, Sz); // Check the order of pointer operands or that all pointers are the same. - bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order); + bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, Order); if (IsSorted || all_of(PointerOps, [&](Value *P) { - return arePointersCompatible(P, PointerOps.front(), TLI); + return arePointersCompatible(P, PointerOps.front(), *TLI); })) { if (IsSorted) { Value *Ptr0; @@ -4051,7 +4060,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef VL, PtrN = PointerOps[Order.back()]; } std::optional Diff = - getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE); + getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE); // Check that the sorted loads are consecutive. if (static_cast(*Diff) == Sz - 1) return LoadsState::Vectorize; @@ -4078,7 +4087,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef VL, Align Alignment = cast(Order.empty() ? VL.front() : VL[Order.front()]) ->getAlign(); - if (TTI.isLegalStridedLoadStore(VecTy, Alignment)) { + if (TTI->isLegalStridedLoadStore(VecTy, Alignment)) { // Iterate through all pointers and check if all distances are // unique multiple of Dist. SmallSet Dists; @@ -4087,7 +4096,8 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef VL, if (Ptr == PtrN) Dist = *Diff; else if (Ptr != Ptr0) - Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE); + Dist = + *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE); // If the strides are not the same or repeated, we can't // vectorize. if (((Dist / Stride) * Stride) != Dist || @@ -4100,11 +4110,11 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef VL, } } } - auto CheckForShuffledLoads = [&](Align CommonAlignment) { - unsigned Sz = DL.getTypeSizeInBits(ScalarTy); - unsigned MinVF = R.getMinVF(Sz); + auto CheckForShuffledLoads = [&, &TTI = *TTI](Align CommonAlignment) { + unsigned Sz = DL->getTypeSizeInBits(ScalarTy); + unsigned MinVF = getMinVF(Sz); unsigned MaxVF = std::max(bit_floor(VL.size() / 2), MinVF); - MaxVF = std::min(R.getMaximumVF(Sz, Instruction::Load), MaxVF); + MaxVF = std::min(getMaximumVF(Sz, Instruction::Load), MaxVF); for (unsigned VF = MaxVF; VF >= MinVF; VF /= 2) { unsigned VectorizedCnt = 0; SmallVector States; @@ -4114,8 +4124,8 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef VL, SmallVector Order; SmallVector PointerOps; LoadsState LS = - canVectorizeLoads(R, Slice, Slice.front(), TTI, DL, SE, LI, TLI, - Order, PointerOps, /*TryRecursiveCheck=*/false); + canVectorizeLoads(Slice, Slice.front(), Order, PointerOps, + /*TryRecursiveCheck=*/false); // Check that the sorted loads are consecutive. if (LS == LoadsState::Gather) break; @@ -4175,7 +4185,7 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef VL, // TODO: need to improve analysis of the pointers, if not all of them are // GEPs or have > 2 operands, we end up with a gather node, which just // increases the cost. - Loop *L = LI.getLoopFor(cast(VL0)->getParent()); + Loop *L = LI->getLoopFor(cast(VL0)->getParent()); bool ProfitableGatherPointers = L && Sz > 2 && count_if(PointerOps, [L](Value *V) { return L->isLoopInvariant(V); @@ -4187,8 +4197,8 @@ static LoadsState canVectorizeLoads(const BoUpSLP &R, ArrayRef VL, isa(GEP->getOperand(1))); })) { Align CommonAlignment = computeCommonAlignment(VL); - if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) && - !TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment)) { + if (TTI->isLegalMaskedGather(VecTy, CommonAlignment) && + !TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment)) { // Check if potential masked gather can be represented as series // of loads + insertsubvectors. if (TryRecursiveCheck && CheckForShuffledLoads(CommonAlignment)) { @@ -5635,8 +5645,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( // treats loading/storing it as an i8 struct. If we vectorize loads/stores // from such a struct, we read/write packed bits disagreeing with the // unvectorized version. - switch (canVectorizeLoads(*this, VL, VL0, *TTI, *DL, *SE, *LI, *TLI, - CurrentOrder, PointerOps)) { + switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps)) { case LoadsState::Vectorize: return TreeEntry::Vectorize; case LoadsState::ScatterVectorize: @@ -7416,9 +7425,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { !VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) { SmallVector PointerOps; OrdersType CurrentOrder; - LoadsState LS = - canVectorizeLoads(R, Slice, Slice.front(), TTI, *R.DL, *R.SE, - *R.LI, *R.TLI, CurrentOrder, PointerOps); + LoadsState LS = R.canVectorizeLoads(Slice, Slice.front(), + CurrentOrder, PointerOps); switch (LS) { case LoadsState::Vectorize: case LoadsState::ScatterVectorize: