diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 405d4a742f37b..ecbd0ef7df5e5 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -251,18 +251,15 @@ struct HistogramInfo { /// induction variable and the different reduction variables. class LoopVectorizationLegality { public: - LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, - DominatorTree *DT, TargetTransformInfo *TTI, - TargetLibraryInfo *TLI, Function *F, - LoopAccessInfoManager &LAIs, LoopInfo *LI, - OptimizationRemarkEmitter *ORE, - LoopVectorizationRequirements *R, - LoopVectorizeHints *H, DemandedBits *DB, - AssumptionCache *AC, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, AAResults *AA) + LoopVectorizationLegality( + Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, + TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F, + LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE, + LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, + AssumptionCache *AC, bool AllowRuntimeSCEVChecks, AAResults *AA) : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs), - ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), PSI(PSI), - AA(AA) {} + ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), + AllowRuntimeSCEVChecks(AllowRuntimeSCEVChecks), AA(AA) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. @@ -720,9 +717,8 @@ class LoopVectorizationLegality { /// Hold potentially faulting loads. SmallPtrSet PotentiallyFaultingLoads; - /// BFI and PSI are used to check for profile guided size optimizations. - BlockFrequencyInfo *BFI; - ProfileSummaryInfo *PSI; + /// Whether or not creating SCEV predicates is allowed. + bool AllowRuntimeSCEVChecks; // Alias Analysis results used to check for possible aliasing with loads // used in uncountable exit conditions. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 03112c67dda7b..86e742ca5fec1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -460,10 +460,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy, const auto &Strides = LAI ? LAI->getSymbolicStrides() : DenseMap(); - bool CanAddPredicate = !llvm::shouldOptimizeForSize( - TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass); int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides, - CanAddPredicate, false) + AllowRuntimeSCEVChecks, false) .value_or(0); if (Stride == 1 || Stride == -1) return Stride; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cbfbc29360b0b..27c20b4ae8356 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -501,12 +501,11 @@ class InnerLoopVectorizer { LoopInfo *LI, DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC, ElementCount VecWidth, unsigned UnrollFactor, - LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks, - VPlan &Plan) + LoopVectorizationCostModel *CM, + GeneratedRTChecks &RTChecks, VPlan &Plan) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TTI(TTI), AC(AC), VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), - Cost(CM), BFI(BFI), PSI(PSI), RTChecks(RTChecks), Plan(Plan), + Cost(CM), RTChecks(RTChecks), Plan(Plan), VectorPHVPBB(cast( Plan.getVectorLoopRegion()->getSinglePredecessor())) {} @@ -583,10 +582,6 @@ class InnerLoopVectorizer { /// The profitablity analysis. LoopVectorizationCostModel *Cost; - /// BFI and PSI are used to check for profile guided size optimizations. - BlockFrequencyInfo *BFI; - ProfileSummaryInfo *PSI; - /// Structure to hold information about generated runtime checks, responsible /// for cleaning the checks, if vectorization turns out unprofitable. GeneratedRTChecks &RTChecks; @@ -635,11 +630,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer { Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, GeneratedRTChecks &Checks, VPlan &Plan, ElementCount VecWidth, ElementCount MinProfitableTripCount, unsigned UnrollFactor) : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, VecWidth, - UnrollFactor, CM, BFI, PSI, Checks, Plan), + UnrollFactor, CM, Checks, Plan), EPI(EPI), MinProfitableTripCount(MinProfitableTripCount) {} /// Holds and updates state information required to vectorize the main loop @@ -665,10 +659,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer { AssumptionCache *AC, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, GeneratedRTChecks &Check, VPlan &Plan) : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM, - BFI, PSI, Check, Plan, EPI.MainLoopVF, + Check, Plan, EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF) {} /// Implements the interface for creating a vectorized skeleton using the /// *main loop* strategy (i.e., the first pass of VPlan execution). @@ -698,14 +691,15 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer { // their epilogues. class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer { public: - EpilogueVectorizerEpilogueLoop( - Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI, - DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC, - EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - GeneratedRTChecks &Checks, VPlan &Plan) + EpilogueVectorizerEpilogueLoop(Loop *OrigLoop, PredicatedScalarEvolution &PSE, + LoopInfo *LI, DominatorTree *DT, + const TargetTransformInfo *TTI, + AssumptionCache *AC, + EpilogueLoopVectorizationInfo &EPI, + LoopVectorizationCostModel *CM, + GeneratedRTChecks &Checks, VPlan &Plan) : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM, - BFI, PSI, Checks, Plan, EPI.EpilogueVF, + Checks, Plan, EPI.EpilogueVF, EPI.EpilogueVF, EPI.EpilogueUF) {} /// Implements the interface for creating a vectorized skeleton using the /// *epilogue loop* strategy (i.e., the second pass of VPlan execution). @@ -881,18 +875,13 @@ class LoopVectorizationCostModel { AssumptionCache *AC, OptimizationRemarkEmitter *ORE, const Function *F, const LoopVectorizeHints *Hints, - InterleavedAccessInfo &IAI, - ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) + InterleavedAccessInfo &IAI, bool OptForSize) : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F), - Hints(Hints), InterleaveInfo(IAI) { + Hints(Hints), InterleaveInfo(IAI), OptForSize(OptForSize) { if (TTI.supportsScalableVectors() || ForceTargetSupportsScalableVectors) initializeVScaleForTuning(); CostKind = F->hasMinSize() ? TTI::TCK_CodeSize : TTI::TCK_RecipThroughput; - // Query this against the original loop and save it here because the profile - // of the original loop header may change as the transformation happens. - OptForSize = llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, - PGSOQueryType::IRPass); } /// \return An upper bound for the vectorization factors (both fixed and @@ -9048,20 +9037,13 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { // predication, and 4) a TTI hook that analyses whether the loop is suitable // for predication. static ScalarEpilogueLowering getScalarEpilogueLowering( - Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, + Function *F, Loop *L, LoopVectorizeHints &Hints, bool OptForSize, + TargetTransformInfo *TTI, TargetLibraryInfo *TLI, LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI) { // 1) OptSize takes precedence over all other options, i.e. if this is set, // don't look at hints or options, and don't request a scalar epilogue. - // (For PGSO, as shouldOptimizeForSize isn't currently accessible from - // LoopAccessInfo (due to code dependency and not being able to reliably get - // PSI/BFI from a loop analysis under NPM), we cannot suppress the collection - // of strides in LoopAccessInfo::analyzeLoop() and vectorize without - // versioning when the vectorization is forced, unlike hasOptSize. So revert - // back to the old way and vectorize with versioning when forced. See D81345.) - if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, - PGSOQueryType::IRPass) && - Hints.getForce() != LoopVectorizeHints::FK_Enabled)) + if (F->hasOptSize() || + (OptForSize && Hints.getForce() != LoopVectorizeHints::FK_Enabled)) return CM_ScalarEpilogueNotAllowedOptSize; // 2) If set, obey the directives @@ -9100,8 +9082,7 @@ static bool processLoopInVPlanNativePath( Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, LoopVectorizationLegality *LVL, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, - OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints, + OptimizationRemarkEmitter *ORE, bool OptForSize, LoopVectorizeHints &Hints, LoopVectorizationRequirements &Requirements) { if (isa(PSE.getBackedgeTakenCount())) { @@ -9113,10 +9094,10 @@ static bool processLoopInVPlanNativePath( InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI()); ScalarEpilogueLowering SEL = - getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, *LVL, &IAI); + getScalarEpilogueLowering(F, L, Hints, OptForSize, TTI, TLI, *LVL, &IAI); LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, - &Hints, IAI, PSI, BFI); + &Hints, IAI, OptForSize); // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an // optional argument if we don't need it in the future. @@ -9142,7 +9123,7 @@ static bool processLoopInVPlanNativePath( { GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind); InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, /*UF=*/1, &CM, - BFI, PSI, Checks, BestPlan); + Checks, BestPlan); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); LVP.addMinimumIterationCheck(BestPlan, VF.Width, /*UF=*/1, @@ -9803,10 +9784,16 @@ bool LoopVectorizePass::processLoop(Loop *L) { PredicatedScalarEvolution PSE(*SE, *L); + // Query this against the original loop and save it here because the profile + // of the original loop header may change as the transformation happens. + bool OptForSize = llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, + PGSOQueryType::IRPass); + // Check if it is legal to vectorize the loop. LoopVectorizationRequirements Requirements; LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, F, *LAIs, LI, ORE, - &Requirements, &Hints, DB, AC, BFI, PSI, AA); + &Requirements, &Hints, DB, AC, + /*AllowRuntimeSCEVChecks=*/!OptForSize, AA); if (!LVL.canVectorize(EnableVPlanNativePath)) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); Hints.emitRemarkWithHints(); @@ -9834,7 +9821,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // pipeline. if (!L->isInnermost()) return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC, - ORE, BFI, PSI, Hints, Requirements); + ORE, OptForSize, Hints, Requirements); assert(L->isInnermost() && "Inner loop expected."); @@ -9864,7 +9851,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check the function attributes and profiles to find out if this function // should be optimized for size. ScalarEpilogueLowering SEL = - getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, LVL, &IAI); + getScalarEpilogueLowering(F, L, Hints, OptForSize, TTI, TLI, LVL, &IAI); // Check the loop for a trip count threshold: vectorize loops with a tiny trip // count by optimizing for size, to minimize overheads. @@ -9937,7 +9924,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Use the cost model. LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, - F, &Hints, IAI, PSI, BFI); + F, &Hints, IAI, OptForSize); // Use the planner for vectorization. LoopVectorizationPlanner LVP(L, LI, DT, TLI, *TTI, &LVL, CM, IAI, PSE, Hints, ORE); @@ -10139,8 +10126,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan); EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1, BestEpiPlan); - EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM, BFI, - PSI, Checks, *BestMainPlan); + EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM, + Checks, *BestMainPlan); auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, false); ++LoopsVectorized; @@ -10148,7 +10135,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Second pass vectorizes the epilogue and adjusts the control flow // edges from the first pass. EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TTI, AC, EPI, &CM, - BFI, PSI, Checks, BestEpiPlan); + Checks, BestEpiPlan); SmallVector InstsToMove = preparePlanForEpilogueVectorLoop( BestEpiPlan, L, ExpandedSCEVs, EPI, CM, *PSE.getSE()); LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT, @@ -10157,8 +10144,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { Checks, InstsToMove); ++LoopsEpilogueVectorized; } else { - InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, BFI, PSI, - Checks, BestPlan); + InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, Checks, + BestPlan); // TODO: Move to general VPlan pipeline once epilogue loops are also // supported. VPlanTransforms::runPass(