Skip to content

Commit f80c0ba

Browse files
committed
Revert "Reapply "[VPlan] Compute cost of more replicating loads/stores in ::computeCost. (#160053)" (#161724)"
This reverts commit 8f2466b to fix crashes reported in commits
1 parent 94eade6 commit f80c0ba

File tree

4 files changed

+29
-134
lines changed

4 files changed

+29
-134
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3903,8 +3903,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
39033903
if (VF.isScalar())
39043904
continue;
39053905

3906-
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
3907-
*CM.PSE.getSE());
3906+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind);
39083907
precomputeCosts(*Plan, VF, CostCtx);
39093908
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
39103909
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
@@ -4161,8 +4160,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
41614160

41624161
// Add on other costs that are modelled in VPlan, but not in the legacy
41634162
// cost model.
4164-
VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind,
4165-
*CM.PSE.getSE());
4163+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind);
41664164
VPRegionBlock *VectorRegion = P->getVectorLoopRegion();
41674165
assert(VectorRegion && "Expected to have a vector region!");
41684166
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -6854,7 +6852,7 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
68546852

68556853
InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
68566854
ElementCount VF) const {
6857-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE());
6855+
VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind);
68586856
InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);
68596857

68606858
// Now compute and add the VPlan-based cost.
@@ -7087,8 +7085,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
70877085
// simplifications not accounted for in the legacy cost model. If that's the
70887086
// case, don't trigger the assertion, as the extra simplifications may cause a
70897087
// different VF to be picked by the VPlan-based cost model.
7090-
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
7091-
*CM.PSE.getSE());
7088+
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind);
70927089
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
70937090
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
70947091
// with early exits and plans with additional VPlan simplifications. The
@@ -8624,8 +8621,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
86248621
// TODO: Enable following transform when the EVL-version of extended-reduction
86258622
// and mulacc-reduction are implemented.
86268623
if (!CM.foldTailWithEVL()) {
8627-
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
8628-
*CM.PSE.getSE());
8624+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind);
86298625
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
86308626
CostCtx, Range);
86318627
}
@@ -10079,7 +10075,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1007910075
bool ForceVectorization =
1008010076
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
1008110077
VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM,
10082-
CM.CostKind, *CM.PSE.getSE());
10078+
CM.CostKind);
1008310079
if (!ForceVectorization &&
1008410080
!isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
1008510081
LVP.getPlanFor(VF.Width), SEL,

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1772,8 +1772,7 @@ VPCostContext::getOperandInfo(VPValue *V) const {
17721772
}
17731773

17741774
InstructionCost VPCostContext::getScalarizationOverhead(
1775-
Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
1776-
bool AlwaysIncludeReplicatingR) {
1775+
Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF) {
17771776
if (VF.isScalar())
17781777
return 0;
17791778

@@ -1793,11 +1792,7 @@ InstructionCost VPCostContext::getScalarizationOverhead(
17931792
SmallPtrSet<const VPValue *, 4> UniqueOperands;
17941793
SmallVector<Type *> Tys;
17951794
for (auto *Op : Operands) {
1796-
if (Op->isLiveIn() ||
1797-
(!AlwaysIncludeReplicatingR &&
1798-
isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op)) ||
1799-
(isa<VPReplicateRecipe>(Op) &&
1800-
cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
1795+
if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
18011796
!UniqueOperands.insert(Op).second)
18021797
continue;
18031798
Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -349,14 +349,12 @@ struct VPCostContext {
349349
LoopVectorizationCostModel &CM;
350350
SmallPtrSet<Instruction *, 8> SkipCostComputation;
351351
TargetTransformInfo::TargetCostKind CostKind;
352-
ScalarEvolution &SE;
353352

354353
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
355354
const VPlan &Plan, LoopVectorizationCostModel &CM,
356-
TargetTransformInfo::TargetCostKind CostKind,
357-
ScalarEvolution &SE)
355+
TargetTransformInfo::TargetCostKind CostKind)
358356
: TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM),
359-
CostKind(CostKind), SE(SE) {}
357+
CostKind(CostKind) {}
360358

361359
/// Return the cost for \p UI with \p VF using the legacy cost model as
362360
/// fallback until computing the cost of all recipes migrates to VPlan.
@@ -376,12 +374,10 @@ struct VPCostContext {
376374

377375
/// Estimate the overhead of scalarizing a recipe with result type \p ResultTy
378376
/// and \p Operands with \p VF. This is a convenience wrapper for the
379-
/// type-based getScalarizationOverhead API. If \p AlwaysIncludeReplicatingR
380-
/// is true, always compute the cost of scalarizing replicating operands.
381-
InstructionCost
382-
getScalarizationOverhead(Type *ResultTy, ArrayRef<const VPValue *> Operands,
383-
ElementCount VF,
384-
bool AlwaysIncludeReplicatingR = false);
377+
/// type-based getScalarizationOverhead API.
378+
InstructionCost getScalarizationOverhead(Type *ResultTy,
379+
ArrayRef<const VPValue *> Operands,
380+
ElementCount VF);
385381
};
386382

387383
/// This class can be used to assign names to VPValues. For VPValues without

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 15 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
#include <cassert>
4141

4242
using namespace llvm;
43-
using namespace llvm::VPlanPatternMatch;
4443

4544
using VectorParts = SmallVector<Value *, 2>;
4645

@@ -304,6 +303,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
304303
VPRecipeBase *OpR = Op->getDefiningRecipe();
305304

306305
// If the partial reduction is predicated, a select will be operand 0
306+
using namespace llvm::VPlanPatternMatch;
307307
if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) {
308308
OpR = Op->getDefiningRecipe();
309309
}
@@ -1963,6 +1963,7 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF,
19631963
Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
19641964

19651965
VPValue *Op0, *Op1;
1966+
using namespace llvm::VPlanPatternMatch;
19661967
if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
19671968
(match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
19681969
match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
@@ -3110,62 +3111,6 @@ bool VPReplicateRecipe::shouldPack() const {
31103111
});
31113112
}
31123113

3113-
/// Returns true if \p Ptr is a pointer computation for which the legacy cost
3114-
/// model computes a SCEV expression when computing the address cost.
3115-
static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) {
3116-
auto *PtrR = Ptr->getDefiningRecipe();
3117-
if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) &&
3118-
cast<VPReplicateRecipe>(PtrR)->getOpcode() ==
3119-
Instruction::GetElementPtr) ||
3120-
isa<VPWidenGEPRecipe>(PtrR) ||
3121-
match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue()))))
3122-
return false;
3123-
3124-
// We are looking for a GEP where all indices are either loop invariant or
3125-
// inductions.
3126-
for (VPValue *Opd : drop_begin(PtrR->operands())) {
3127-
if (!Opd->isDefinedOutsideLoopRegions() &&
3128-
!isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd))
3129-
return false;
3130-
}
3131-
3132-
return true;
3133-
}
3134-
3135-
/// Returns true if \p V is used as part of the address of another load or
3136-
/// store.
3137-
static bool isUsedByLoadStoreAddress(const VPUser *V) {
3138-
SmallPtrSet<const VPUser *, 4> Seen;
3139-
SmallVector<const VPUser *> WorkList = {V};
3140-
3141-
while (!WorkList.empty()) {
3142-
auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val());
3143-
if (!Cur || !Seen.insert(Cur).second)
3144-
continue;
3145-
3146-
for (VPUser *U : Cur->users()) {
3147-
if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(U))
3148-
if (InterleaveR->getAddr() == Cur)
3149-
return true;
3150-
if (auto *RepR = dyn_cast<VPReplicateRecipe>(U)) {
3151-
if (RepR->getOpcode() == Instruction::Load &&
3152-
RepR->getOperand(0) == Cur)
3153-
return true;
3154-
if (RepR->getOpcode() == Instruction::Store &&
3155-
RepR->getOperand(1) == Cur)
3156-
return true;
3157-
}
3158-
if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) {
3159-
if (MemR->getAddr() == Cur && MemR->isConsecutive())
3160-
return true;
3161-
}
3162-
}
3163-
3164-
append_range(WorkList, cast<VPSingleDefRecipe>(Cur)->users());
3165-
}
3166-
return false;
3167-
}
3168-
31693114
InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
31703115
VPCostContext &Ctx) const {
31713116
Instruction *UI = cast<Instruction>(getUnderlyingValue());
@@ -3273,58 +3218,21 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
32733218
}
32743219
case Instruction::Load:
32753220
case Instruction::Store: {
3276-
if (VF.isScalable() && !isSingleScalar())
3277-
return InstructionCost::getInvalid();
3278-
3221+
if (isSingleScalar()) {
3222+
bool IsLoad = UI->getOpcode() == Instruction::Load;
3223+
Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
3224+
Type *ScalarPtrTy = Ctx.Types.inferScalarType(getOperand(IsLoad ? 0 : 1));
3225+
const Align Alignment = getLoadStoreAlignment(UI);
3226+
unsigned AS = getLoadStoreAddressSpace(UI);
3227+
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0));
3228+
InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost(
3229+
UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo, UI);
3230+
return ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
3231+
ScalarPtrTy, nullptr, nullptr, Ctx.CostKind);
3232+
}
32793233
// TODO: See getMemInstScalarizationCost for how to handle replicating and
32803234
// predicated cases.
3281-
const VPRegionBlock *ParentRegion = getParent()->getParent();
3282-
if (ParentRegion && ParentRegion->isReplicator())
3283-
break;
3284-
3285-
bool IsLoad = UI->getOpcode() == Instruction::Load;
3286-
const VPValue *PtrOp = getOperand(!IsLoad);
3287-
// TODO: Handle cases where we need to pass a SCEV to
3288-
// getAddressComputationCost.
3289-
if (shouldUseAddressAccessSCEV(PtrOp))
3290-
break;
3291-
3292-
Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
3293-
Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp);
3294-
const Align Alignment = getLoadStoreAlignment(UI);
3295-
unsigned AS = getLoadStoreAddressSpace(UI);
3296-
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0));
3297-
InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost(
3298-
UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);
3299-
3300-
Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF);
3301-
3302-
InstructionCost ScalarCost =
3303-
ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
3304-
PtrTy, &Ctx.SE, nullptr, Ctx.CostKind);
3305-
if (isSingleScalar())
3306-
return ScalarCost;
3307-
3308-
SmallVector<const VPValue *> OpsToScalarize;
3309-
Type *ResultTy = Type::getVoidTy(PtrTy->getContext());
3310-
// Set ResultTy and OpsToScalarize, if scalarization is needed. Currently we
3311-
// don't assign scalarization overhead in general, if the target prefers
3312-
// vectorized addressing or the loaded value is used as part of an address
3313-
// of another load or store.
3314-
bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();
3315-
if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress(this)) {
3316-
bool EfficientVectorLoadStore =
3317-
Ctx.TTI.supportsEfficientVectorElementLoadStore();
3318-
if (!(IsLoad && !PreferVectorizedAddressing) &&
3319-
!(!IsLoad && EfficientVectorLoadStore))
3320-
append_range(OpsToScalarize, operands());
3321-
3322-
if (!EfficientVectorLoadStore)
3323-
ResultTy = Ctx.Types.inferScalarType(this);
3324-
}
3325-
3326-
return (ScalarCost * VF.getFixedValue()) +
3327-
Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, true);
3235+
break;
33283236
}
33293237
}
33303238

0 commit comments

Comments
 (0)