|
40 | 40 | #include <cassert>
|
41 | 41 |
|
42 | 42 | using namespace llvm;
|
43 |
| -using namespace llvm::VPlanPatternMatch; |
44 | 43 |
|
45 | 44 | using VectorParts = SmallVector<Value *, 2>;
|
46 | 45 |
|
@@ -304,6 +303,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
|
304 | 303 | VPRecipeBase *OpR = Op->getDefiningRecipe();
|
305 | 304 |
|
306 | 305 | // If the partial reduction is predicated, a select will be operand 0
|
| 306 | + using namespace llvm::VPlanPatternMatch; |
307 | 307 | if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) {
|
308 | 308 | OpR = Op->getDefiningRecipe();
|
309 | 309 | }
|
@@ -1963,6 +1963,7 @@ InstructionCost VPWidenSelectRecipe::computeCost(ElementCount VF,
|
1963 | 1963 | Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
|
1964 | 1964 |
|
1965 | 1965 | VPValue *Op0, *Op1;
|
| 1966 | + using namespace llvm::VPlanPatternMatch; |
1966 | 1967 | if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
|
1967 | 1968 | (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
|
1968 | 1969 | match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
|
@@ -3110,62 +3111,6 @@ bool VPReplicateRecipe::shouldPack() const {
|
3110 | 3111 | });
|
3111 | 3112 | }
|
3112 | 3113 |
|
3113 |
| -/// Returns true if \p Ptr is a pointer computation for which the legacy cost |
3114 |
| -/// model computes a SCEV expression when computing the address cost. |
3115 |
| -static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) { |
3116 |
| - auto *PtrR = Ptr->getDefiningRecipe(); |
3117 |
| - if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) && |
3118 |
| - cast<VPReplicateRecipe>(PtrR)->getOpcode() == |
3119 |
| - Instruction::GetElementPtr) || |
3120 |
| - isa<VPWidenGEPRecipe>(PtrR) || |
3121 |
| - match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue())))) |
3122 |
| - return false; |
3123 |
| - |
3124 |
| - // We are looking for a GEP where all indices are either loop invariant or |
3125 |
| - // inductions. |
3126 |
| - for (VPValue *Opd : drop_begin(PtrR->operands())) { |
3127 |
| - if (!Opd->isDefinedOutsideLoopRegions() && |
3128 |
| - !isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd)) |
3129 |
| - return false; |
3130 |
| - } |
3131 |
| - |
3132 |
| - return true; |
3133 |
| -} |
3134 |
| - |
3135 |
| -/// Returns true if \p V is used as part of the address of another load or |
3136 |
| -/// store. |
3137 |
| -static bool isUsedByLoadStoreAddress(const VPUser *V) { |
3138 |
| - SmallPtrSet<const VPUser *, 4> Seen; |
3139 |
| - SmallVector<const VPUser *> WorkList = {V}; |
3140 |
| - |
3141 |
| - while (!WorkList.empty()) { |
3142 |
| - auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val()); |
3143 |
| - if (!Cur || !Seen.insert(Cur).second) |
3144 |
| - continue; |
3145 |
| - |
3146 |
| - for (VPUser *U : Cur->users()) { |
3147 |
| - if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(U)) |
3148 |
| - if (InterleaveR->getAddr() == Cur) |
3149 |
| - return true; |
3150 |
| - if (auto *RepR = dyn_cast<VPReplicateRecipe>(U)) { |
3151 |
| - if (RepR->getOpcode() == Instruction::Load && |
3152 |
| - RepR->getOperand(0) == Cur) |
3153 |
| - return true; |
3154 |
| - if (RepR->getOpcode() == Instruction::Store && |
3155 |
| - RepR->getOperand(1) == Cur) |
3156 |
| - return true; |
3157 |
| - } |
3158 |
| - if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) { |
3159 |
| - if (MemR->getAddr() == Cur && MemR->isConsecutive()) |
3160 |
| - return true; |
3161 |
| - } |
3162 |
| - } |
3163 |
| - |
3164 |
| - append_range(WorkList, cast<VPSingleDefRecipe>(Cur)->users()); |
3165 |
| - } |
3166 |
| - return false; |
3167 |
| -} |
3168 |
| - |
3169 | 3114 | InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
|
3170 | 3115 | VPCostContext &Ctx) const {
|
3171 | 3116 | Instruction *UI = cast<Instruction>(getUnderlyingValue());
|
@@ -3273,58 +3218,21 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
|
3273 | 3218 | }
|
3274 | 3219 | case Instruction::Load:
|
3275 | 3220 | case Instruction::Store: {
|
3276 |
| - if (VF.isScalable() && !isSingleScalar()) |
3277 |
| - return InstructionCost::getInvalid(); |
3278 |
| - |
| 3221 | + if (isSingleScalar()) { |
| 3222 | + bool IsLoad = UI->getOpcode() == Instruction::Load; |
| 3223 | + Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); |
| 3224 | + Type *ScalarPtrTy = Ctx.Types.inferScalarType(getOperand(IsLoad ? 0 : 1)); |
| 3225 | + const Align Alignment = getLoadStoreAlignment(UI); |
| 3226 | + unsigned AS = getLoadStoreAddressSpace(UI); |
| 3227 | + TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0)); |
| 3228 | + InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost( |
| 3229 | + UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo, UI); |
| 3230 | + return ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( |
| 3231 | + ScalarPtrTy, nullptr, nullptr, Ctx.CostKind); |
| 3232 | + } |
3279 | 3233 | // TODO: See getMemInstScalarizationCost for how to handle replicating and
|
3280 | 3234 | // predicated cases.
|
3281 |
| - const VPRegionBlock *ParentRegion = getParent()->getParent(); |
3282 |
| - if (ParentRegion && ParentRegion->isReplicator()) |
3283 |
| - break; |
3284 |
| - |
3285 |
| - bool IsLoad = UI->getOpcode() == Instruction::Load; |
3286 |
| - const VPValue *PtrOp = getOperand(!IsLoad); |
3287 |
| - // TODO: Handle cases where we need to pass a SCEV to |
3288 |
| - // getAddressComputationCost. |
3289 |
| - if (shouldUseAddressAccessSCEV(PtrOp)) |
3290 |
| - break; |
3291 |
| - |
3292 |
| - Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); |
3293 |
| - Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp); |
3294 |
| - const Align Alignment = getLoadStoreAlignment(UI); |
3295 |
| - unsigned AS = getLoadStoreAddressSpace(UI); |
3296 |
| - TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0)); |
3297 |
| - InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost( |
3298 |
| - UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo); |
3299 |
| - |
3300 |
| - Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF); |
3301 |
| - |
3302 |
| - InstructionCost ScalarCost = |
3303 |
| - ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( |
3304 |
| - PtrTy, &Ctx.SE, nullptr, Ctx.CostKind); |
3305 |
| - if (isSingleScalar()) |
3306 |
| - return ScalarCost; |
3307 |
| - |
3308 |
| - SmallVector<const VPValue *> OpsToScalarize; |
3309 |
| - Type *ResultTy = Type::getVoidTy(PtrTy->getContext()); |
3310 |
| - // Set ResultTy and OpsToScalarize, if scalarization is needed. Currently we |
3311 |
| - // don't assign scalarization overhead in general, if the target prefers |
3312 |
| - // vectorized addressing or the loaded value is used as part of an address |
3313 |
| - // of another load or store. |
3314 |
| - bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing(); |
3315 |
| - if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress(this)) { |
3316 |
| - bool EfficientVectorLoadStore = |
3317 |
| - Ctx.TTI.supportsEfficientVectorElementLoadStore(); |
3318 |
| - if (!(IsLoad && !PreferVectorizedAddressing) && |
3319 |
| - !(!IsLoad && EfficientVectorLoadStore)) |
3320 |
| - append_range(OpsToScalarize, operands()); |
3321 |
| - |
3322 |
| - if (!EfficientVectorLoadStore) |
3323 |
| - ResultTy = Ctx.Types.inferScalarType(this); |
3324 |
| - } |
3325 |
| - |
3326 |
| - return (ScalarCost * VF.getFixedValue()) + |
3327 |
| - Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, true); |
| 3235 | + break; |
3328 | 3236 | }
|
3329 | 3237 | }
|
3330 | 3238 |
|
|
0 commit comments