From 3f47ccb1d265f2f3281e244ad14b1618a7704fcb Mon Sep 17 00:00:00 2001 From: Mikhail Gudim Date: Wed, 6 Aug 2025 09:11:07 -0700 Subject: [PATCH] [SLPVectorizer][NFC] Avoid calling `calculateRtStride` twice. Before this patch, we call it once when we check if run-time strided load is legal, and call it second time when we generate the code. Instead we add a `StrideSCEV` field to `TreeEntry`, when `calculateRtStride` returns `true`, this field is set to the SCEV of run-time stride. we return `true`. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 108 +++++++++--------- 1 file changed, 53 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5d0e2f9518a51..c4198159e039a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2199,6 +2199,7 @@ class BoUpSLP { LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, SmallVectorImpl &Order, SmallVectorImpl &PointerOps, + const SCEV **StrideSCEV = nullptr, unsigned *BestVF = nullptr, bool TryRecursiveCheck = true) const; @@ -3879,6 +3880,10 @@ class BoUpSLP { }; EntryState State; + // If we decide to generate a strided load for this Entry, `StrideSCEV` + // will be set. + const SCEV *StrideSCEV = nullptr; + /// List of combined opcodes supported by the vectorizer. enum CombinedOpcode { NotCombinedOp = -1, @@ -4430,11 +4435,10 @@ class BoUpSLP { /// Checks if the specified list of the instructions/values can be vectorized /// and fills required data before actual scheduling of the instructions. - TreeEntry::EntryState - getScalarsVectorizationState(const InstructionsState &S, ArrayRef VL, - bool IsScatterVectorizeUserTE, - OrdersType &CurrentOrder, - SmallVectorImpl &PointerOps); + TreeEntry::EntryState getScalarsVectorizationState( + const InstructionsState &S, ArrayRef VL, + bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder, + SmallVectorImpl &PointerOps, const SCEV **StrideSCEV); /// Maps a specific scalar to its tree entry(ies). SmallDenseMap> ScalarToTreeEntries; @@ -5734,17 +5738,13 @@ static bool isReverseOrder(ArrayRef Order) { } /// Checks if the provided list of pointers \p Pointers represents the strided -/// pointers for type ElemTy. If they are not, std::nullopt is returned. -/// Otherwise, if \p Inst is not specified, just initialized optional value is -/// returned to show that the pointers represent strided pointers. If \p Inst -/// specified, the runtime stride is materialized before the given \p Inst. -/// \returns std::nullopt if the pointers are not pointers with the runtime -/// stride, nullptr or actual stride value, otherwise. -static std::optional -calculateRtStride(ArrayRef PointerOps, Type *ElemTy, - const DataLayout &DL, ScalarEvolution &SE, - SmallVectorImpl &SortedIndices, - Instruction *Inst = nullptr) { +/// pointers for type ElemTy. If they are not, `false` is returned. +/// Otherwise, if `true` is returned. If `StrideSCEV` is not nullptr, +/// it is set the the SCEV of the run-time stride. +static bool calculateRtStride(ArrayRef PointerOps, Type *ElemTy, + const DataLayout &DL, ScalarEvolution &SE, + SmallVectorImpl &SortedIndices, + const SCEV **StrideSCEV) { SmallVector SCEVs; const SCEV *PtrSCEVLowest = nullptr; const SCEV *PtrSCEVHighest = nullptr; @@ -5753,7 +5753,7 @@ calculateRtStride(ArrayRef PointerOps, Type *ElemTy, for (Value *Ptr : PointerOps) { const SCEV *PtrSCEV = SE.getSCEV(Ptr); if (!PtrSCEV) - return std::nullopt; + return false; SCEVs.push_back(PtrSCEV); if (!PtrSCEVLowest && !PtrSCEVHighest) { PtrSCEVLowest = PtrSCEVHighest = PtrSCEV; @@ -5761,14 +5761,14 @@ calculateRtStride(ArrayRef PointerOps, Type *ElemTy, } const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest); if (isa(Diff)) - return std::nullopt; + return false; if (Diff->isNonConstantNegative()) { PtrSCEVLowest = PtrSCEV; continue; } const SCEV *Diff1 = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEV); if (isa(Diff1)) - return std::nullopt; + return false; if (Diff1->isNonConstantNegative()) { PtrSCEVHighest = PtrSCEV; continue; @@ -5777,7 +5777,7 @@ calculateRtStride(ArrayRef PointerOps, Type *ElemTy, // Dist = PtrSCEVHighest - PtrSCEVLowest; const SCEV *Dist = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEVLowest); if (isa(Dist)) - return std::nullopt; + return false; int Size = DL.getTypeStoreSize(ElemTy); auto TryGetStride = [&](const SCEV *Dist, const SCEV *Multiplier) -> const SCEV * { @@ -5798,10 +5798,10 @@ calculateRtStride(ArrayRef PointerOps, Type *ElemTy, const SCEV *Sz = SE.getConstant(Dist->getType(), Size * (SCEVs.size() - 1)); Stride = TryGetStride(Dist, Sz); if (!Stride) - return std::nullopt; + return false; } if (!Stride || isa(Stride)) - return std::nullopt; + return false; // Iterate through all pointers and check if all distances are // unique multiple of Stride. using DistOrdPair = std::pair; @@ -5815,28 +5815,28 @@ calculateRtStride(ArrayRef PointerOps, Type *ElemTy, const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest); const SCEV *Coeff = TryGetStride(Diff, Stride); if (!Coeff) - return std::nullopt; + return false; const auto *SC = dyn_cast(Coeff); if (!SC || isa(SC)) - return std::nullopt; + return false; if (!SE.getMinusSCEV(PtrSCEV, SE.getAddExpr(PtrSCEVLowest, SE.getMulExpr(Stride, SC))) ->isZero()) - return std::nullopt; + return false; Dist = SC->getAPInt().getZExtValue(); } // If the strides are not the same or repeated, we can't vectorize. if ((Dist / Size) * Size != Dist || (Dist / Size) >= SCEVs.size()) - return std::nullopt; + return false; auto Res = Offsets.emplace(Dist, Cnt); if (!Res.second) - return std::nullopt; + return false; // Consecutive order if the inserted element is the last one. IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end(); ++Cnt; } if (Offsets.size() != SCEVs.size()) - return std::nullopt; + return false; SortedIndices.clear(); if (!IsConsecutive) { // Fill SortedIndices array only if it is non-consecutive. @@ -5847,10 +5847,9 @@ calculateRtStride(ArrayRef PointerOps, Type *ElemTy, ++Cnt; } } - if (!Inst) - return nullptr; - SCEVExpander Expander(SE, DL, "strided-load-vec"); - return Expander.expandCodeFor(Stride, Stride->getType(), Inst); + if (StrideSCEV) + *StrideSCEV = Stride; + return true; } static std::pair @@ -6246,11 +6245,10 @@ static bool isStridedLoad(ArrayRef VL, ArrayRef PointerOps, return false; } -BoUpSLP::LoadsState -BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, - SmallVectorImpl &Order, - SmallVectorImpl &PointerOps, - unsigned *BestVF, bool TryRecursiveCheck) const { +BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( + ArrayRef VL, const Value *VL0, SmallVectorImpl &Order, + SmallVectorImpl &PointerOps, const SCEV **StrideSCEV, + unsigned *BestVF, bool TryRecursiveCheck) const { // Check that a vectorized load would load the same memory as a scalar // load. For example, we don't want to vectorize loads that are smaller // than 8-bit. Even though we have a packed struct {} LLVM @@ -6289,7 +6287,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, if (!IsSorted) { if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) { if (TTI->isLegalStridedLoadStore(VecTy, CommonAlignment) && - calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order)) + calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order, StrideSCEV)) return LoadsState::StridedVectorize; } @@ -6418,9 +6416,9 @@ BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, ArrayRef Slice = VL.slice(Cnt, VF); SmallVector Order; SmallVector PointerOps; - LoadsState LS = - canVectorizeLoads(Slice, Slice.front(), Order, PointerOps, BestVF, - /*TryRecursiveCheck=*/false); + LoadsState LS = canVectorizeLoads(Slice, Slice.front(), Order, + PointerOps, StrideSCEV, BestVF, + /*TryRecursiveCheck=*/false); // Check that the sorted loads are consecutive. if (LS == LoadsState::Gather) { if (BestVF) { @@ -8565,8 +8563,9 @@ void BoUpSLP::tryToVectorizeGatheredLoads( // Try to build vector load. ArrayRef Values( reinterpret_cast(Slice.begin()), Slice.size()); - LoadsState LS = canVectorizeLoads(Values, Slice.front(), CurrentOrder, - PointerOps, &BestVF); + LoadsState LS = + canVectorizeLoads(Values, Slice.front(), CurrentOrder, PointerOps, + /*StrideSCEV = */ nullptr, &BestVF); if (LS != LoadsState::Gather || (BestVF > 1 && static_cast(NumElts) == 2 * BestVF)) { if (LS == LoadsState::ScatterVectorize) { @@ -9171,7 +9170,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy, BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( const InstructionsState &S, ArrayRef VL, bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder, - SmallVectorImpl &PointerOps) { + SmallVectorImpl &PointerOps, const SCEV **StrideSCEV) { assert(S.getMainOp() && "Expected instructions with same/alternate opcodes only."); @@ -9273,7 +9272,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( }); }); }; - switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps)) { + switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps, StrideSCEV)) { case LoadsState::Vectorize: return TreeEntry::Vectorize; case LoadsState::CompressVectorize: @@ -10710,8 +10709,9 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; OrdersType CurrentOrder; SmallVector PointerOps; + const SCEV *StrideSCEV = nullptr; TreeEntry::EntryState State = getScalarsVectorizationState( - S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps); + S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps, &StrideSCEV); if (State == TreeEntry::NeedToGather) { newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); return; @@ -10871,6 +10871,7 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, // Vectorizing non-consecutive loads with `llvm.masked.gather`. TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S, UserTreeIdx, ReuseShuffleIndices, CurrentOrder); + TE->StrideSCEV = StrideSCEV; LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (strided LoadInst).\n"; TE->dump()); break; @@ -18711,16 +18712,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride * DL->getTypeAllocSize(ScalarTy)); } else { - SmallVector PointerOps(E->Scalars.size(), nullptr); - transform(E->Scalars, PointerOps.begin(), [](Value *V) { - return cast(V)->getPointerOperand(); - }); - OrdersType Order; - std::optional Stride = - calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order, - &*Builder.GetInsertPoint()); + const SCEV *StrideSCEV = E->StrideSCEV; + assert(StrideSCEV); + SCEVExpander Expander(*SE, *DL, "strided-load-vec"); + Value *Stride = Expander.expandCodeFor( + StrideSCEV, StrideSCEV->getType(), &*Builder.GetInsertPoint()); Value *NewStride = - Builder.CreateIntCast(*Stride, StrideTy, /*isSigned=*/true); + Builder.CreateIntCast(Stride, StrideTy, /*isSigned=*/true); StrideVal = Builder.CreateMul( NewStride, ConstantInt::get(