@@ -578,8 +578,10 @@ class InnerLoopVectorizer {
578
578
// / The profitablity analysis.
579
579
LoopVectorizationCostModel *Cost;
580
580
581
- // / BFI and PSI are used to check for profile guided size optimizations.
581
+ // / Used to calculate the probability of predicated blocks in
582
+ // / getPredBlockCostDivisor.
582
583
BlockFrequencyInfo *BFI;
584
+ // / Used to check for profile guided size optimizations.
583
585
ProfileSummaryInfo *PSI;
584
586
585
587
// / Structure to hold information about generated runtime checks, responsible
@@ -900,7 +902,7 @@ class LoopVectorizationCostModel {
900
902
InterleavedAccessInfo &IAI,
901
903
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
902
904
: ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal),
903
- TTI (TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F),
905
+ TTI (TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), BFI(BFI), TheFunction(F),
904
906
Hints(Hints), InterleaveInfo(IAI) {
905
907
if (TTI.supportsScalableVectors () || ForceTargetSupportsScalableVectors)
906
908
initializeVScaleForTuning ();
@@ -1249,6 +1251,17 @@ class LoopVectorizationCostModel {
1249
1251
// / Superset of instructions that return true for isScalarWithPredication.
1250
1252
bool isPredicatedInst (Instruction *I) const ;
1251
1253
1254
+ // / A helper function that returns how much we should divide the cost of a
1255
+ // / predicated block by. Typically this is the reciprocal of the block
1256
+ // / probability, i.e. if we return X we are assuming the predicated block will
1257
+ // / execute once for every X iterations of the loop header so the block should
1258
+ // / only contribute 1/X of its cost to the total cost calculation, but when
1259
+ // / optimizing for code size it will just be 1 as code size costs don't depend
1260
+ // / on execution probabilities.
1261
+ inline unsigned
1262
+ getPredBlockCostDivisor (TargetTransformInfo::TargetCostKind CostKind,
1263
+ const BasicBlock *BB) const ;
1264
+
1252
1265
// / Return the costs for our two available strategies for lowering a
1253
1266
// / div/rem operation which requires speculating at least one lane.
1254
1267
// / First result is for scalarization (will be invalid for scalable
@@ -1711,6 +1724,8 @@ class LoopVectorizationCostModel {
1711
1724
// / Interface to emit optimization remarks.
1712
1725
OptimizationRemarkEmitter *ORE;
1713
1726
1727
+ const BlockFrequencyInfo *BFI;
1728
+
1714
1729
const Function *TheFunction;
1715
1730
1716
1731
// / Loop Vectorize Hint.
@@ -2866,6 +2881,19 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
2866
2881
}
2867
2882
}
2868
2883
2884
+ unsigned LoopVectorizationCostModel::getPredBlockCostDivisor (
2885
+ TargetTransformInfo::TargetCostKind CostKind, const BasicBlock *BB) const {
2886
+ if (CostKind == TTI::TCK_CodeSize)
2887
+ return 1 ;
2888
+
2889
+ uint64_t HeaderFreq = BFI->getBlockFreq (TheLoop->getHeader ()).getFrequency ();
2890
+ uint64_t BBFreq = BFI->getBlockFreq (BB).getFrequency ();
2891
+ assert (HeaderFreq >= BBFreq &&
2892
+ " Header has smaller block freq than dominated BB?" );
2893
+ return BFI->getBlockFreq (TheLoop->getHeader ()).getFrequency () /
2894
+ BFI->getBlockFreq (BB).getFrequency ();
2895
+ }
2896
+
2869
2897
std::pair<InstructionCost, InstructionCost>
2870
2898
LoopVectorizationCostModel::getDivRemSpeculationCost (Instruction *I,
2871
2899
ElementCount VF) const {
@@ -2902,7 +2930,8 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
2902
2930
// Scale the cost by the probability of executing the predicated blocks.
2903
2931
// This assumes the predicated block for each vector lane is equally
2904
2932
// likely.
2905
- ScalarizationCost = ScalarizationCost / getPredBlockCostDivisor (CostKind);
2933
+ ScalarizationCost =
2934
+ ScalarizationCost / getPredBlockCostDivisor (CostKind, I->getParent ());
2906
2935
}
2907
2936
2908
2937
InstructionCost SafeDivisorCost = 0 ;
@@ -5035,7 +5064,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
5035
5064
}
5036
5065
5037
5066
// Scale the total scalar cost by block probability.
5038
- ScalarCost /= getPredBlockCostDivisor (CostKind);
5067
+ ScalarCost /= getPredBlockCostDivisor (CostKind, PredInst-> getParent () );
5039
5068
5040
5069
// Compute the discount. A non-negative discount means the vector version
5041
5070
// of the instruction costs more, and scalarizing would be beneficial.
@@ -5088,7 +5117,7 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
5088
5117
// cost by the probability of executing it. blockNeedsPredication from
5089
5118
// Legal is used so as to not include all blocks in tail folded loops.
5090
5119
if (VF.isScalar () && Legal->blockNeedsPredication (BB))
5091
- BlockCost /= getPredBlockCostDivisor (CostKind);
5120
+ BlockCost /= getPredBlockCostDivisor (CostKind, BB );
5092
5121
5093
5122
Cost += BlockCost;
5094
5123
}
@@ -5167,7 +5196,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
5167
5196
// conditional branches, but may not be executed for each vector lane. Scale
5168
5197
// the cost by the probability of executing the predicated block.
5169
5198
if (isPredicatedInst (I)) {
5170
- Cost /= getPredBlockCostDivisor (CostKind);
5199
+ Cost /= getPredBlockCostDivisor (CostKind, I-> getParent () );
5171
5200
5172
5201
// Add the cost of an i1 extract and a branch
5173
5202
auto *VecI1Ty =
@@ -6727,6 +6756,11 @@ bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const {
6727
6756
SkipCostComputation.contains (UI);
6728
6757
}
6729
6758
6759
+ unsigned VPCostContext::getPredBlockCostDivisor (
6760
+ TargetTransformInfo::TargetCostKind CostKind, const BasicBlock *BB) const {
6761
+ return CM.getPredBlockCostDivisor (CostKind, BB);
6762
+ }
6763
+
6730
6764
InstructionCost
6731
6765
LoopVectorizationPlanner::precomputeCosts (VPlan &Plan, ElementCount VF,
6732
6766
VPCostContext &CostCtx) const {
@@ -10310,9 +10344,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
10310
10344
10311
10345
auto &MAMProxy = AM.getResult <ModuleAnalysisManagerFunctionProxy>(F);
10312
10346
PSI = MAMProxy.getCachedResult <ProfileSummaryAnalysis>(*F.getParent ());
10313
- BFI = nullptr ;
10314
- if (PSI && PSI->hasProfileSummary ())
10315
- BFI = &AM.getResult <BlockFrequencyAnalysis>(F);
10347
+ BFI = &AM.getResult <BlockFrequencyAnalysis>(F);
10316
10348
LoopVectorizeResult Result = runImpl (F);
10317
10349
if (!Result.MadeAnyChange )
10318
10350
return PreservedAnalyses::all ();
0 commit comments