Skip to content

Commit

Permalink
[SLPVectorizer]Migrate getEntryCost to return InstructionCost
Browse files Browse the repository at this point in the history
This patch also changes:
  the return type of getGatherCost and
  the signature of the debug function dumpTreeCosts
to use InstructionCost.

This patch is part of a series of patches to use InstructionCost instead of
unsigned/int for the cost model functions.

See this thread for context:
http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html

See this patch for the introduction of the type:
https://reviews.llvm.org/D91174

Depends on D93049

Differential Revision: https://reviews.llvm.org/D93127
  • Loading branch information
CarolineConcatto committed Dec 16, 2020
1 parent c5771a2 commit be9184b
Showing 1 changed file with 58 additions and 53 deletions.
111 changes: 58 additions & 53 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Expand Up @@ -79,6 +79,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
Expand Down Expand Up @@ -1493,7 +1494,7 @@ class BoUpSLP {
bool areAllUsersVectorized(Instruction *I) const;

/// \returns the cost of the vectorizable entry.
int getEntryCost(TreeEntry *E);
InstructionCost getEntryCost(TreeEntry *E);

/// This is the recursive part of buildTree.
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
Expand All @@ -1515,13 +1516,14 @@ class BoUpSLP {

/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.
int getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const;
InstructionCost
getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const;

/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
int getGatherCost(ArrayRef<Value *> VL) const;
InstructionCost getGatherCost(ArrayRef<Value *> VL) const;

/// Set the Builder insert point to one after the last instruction in
/// the bundle
Expand Down Expand Up @@ -1755,8 +1757,9 @@ class BoUpSLP {
};

#ifndef NDEBUG
void dumpTreeCosts(TreeEntry *E, int ReuseShuffleCost, int VecCost,
int ScalarCost) const {
void dumpTreeCosts(TreeEntry *E, InstructionCost ReuseShuffleCost,
InstructionCost VecCost,
InstructionCost ScalarCost) const {
dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump();
dbgs() << "SLP: Costs:\n";
dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n";
Expand Down Expand Up @@ -3423,7 +3426,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
return {IntrinsicCost, LibCost};
}

int BoUpSLP::getEntryCost(TreeEntry *E) {
InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
ArrayRef<Value*> VL = E->Scalars;

Type *ScalarTy = VL[0]->getType();
Expand All @@ -3442,7 +3445,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {

unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
int ReuseShuffleCost = 0;
InstructionCost ReuseShuffleCost = 0;
if (NeedToShuffleReuses) {
ReuseShuffleCost =
TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
Expand All @@ -3458,7 +3461,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
allSameType(VL) && allSameBlock(VL)) {
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
if (ShuffleKind.hasValue()) {
int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
InstructionCost Cost =
TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
for (auto *V : VL) {
// If all users of instruction are going to be vectorized and this
// instruction itself is not going to be vectorized, consider this
Expand Down Expand Up @@ -3490,7 +3494,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {

case Instruction::ExtractValue:
case Instruction::ExtractElement: {
int DeadCost = 0;
InstructionCost DeadCost = 0;
if (NeedToShuffleReuses) {
unsigned Idx = 0;
for (unsigned I : E->ReuseShuffleIndices) {
Expand Down Expand Up @@ -3565,18 +3569,18 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::FPTrunc:
case Instruction::BitCast: {
Type *SrcTy = VL0->getOperand(0)->getType();
int ScalarEltCost =
InstructionCost ScalarEltCost =
TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy,
TTI::getCastContextHint(VL0), CostKind, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}

// Calculate the cost of this instruction.
int ScalarCost = VL.size() * ScalarEltCost;
InstructionCost ScalarCost = VL.size() * ScalarEltCost;

auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
int VecCost = 0;
InstructionCost VecCost = 0;
// Check if the values are candidates to demote.
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
VecCost =
Expand All @@ -3591,14 +3595,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::ICmp:
case Instruction::Select: {
// Calculate the cost of this instruction.
int ScalarEltCost =
InstructionCost ScalarEltCost =
TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;

// Check if all entries in VL are either compares or selects with compares
// as condition that have the same predicates.
Expand All @@ -3617,16 +3621,17 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
VecPred = CurrentPred;
}

int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
VecPred, CostKind, VL0);
InstructionCost VecCost = TTI->getCmpSelInstrCost(
E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0);
// Check if it is possible and profitable to use min/max for selects in
// VL.
//
auto IntrinsicAndUse = canConvertToMinOrMaxIntrinsic(VL);
if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) {
IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy,
{VecTy, VecTy});
int IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
InstructionCost IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
// If the selects are the only uses of the compares, they will be dead
// and we can adjust the cost by removing their cost.
if (IntrinsicAndUse.second)
Expand Down Expand Up @@ -3695,16 +3700,16 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}

SmallVector<const Value *, 4> Operands(VL0->operand_values());
int ScalarEltCost = TTI->getArithmeticInstrCost(
E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
Operands, VL0);
InstructionCost ScalarEltCost =
TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK,
Op2VK, Op1VP, Op2VP, Operands, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost = TTI->getArithmeticInstrCost(
E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP,
Operands, VL0);
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost =
TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK,
Op2VK, Op1VP, Op2VP, Operands, VL0);
LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
return ReuseShuffleCost + VecCost - ScalarCost;
}
Expand All @@ -3714,30 +3719,27 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_UniformConstantValue;

int ScalarEltCost =
TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, CostKind,
Op1VK, Op2VK);
InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost =
TTI->getArithmeticInstrCost(Instruction::Add, VecTy, CostKind,
Op1VK, Op2VK);
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost = TTI->getArithmeticInstrCost(
Instruction::Add, VecTy, CostKind, Op1VK, Op2VK);
LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost));
return ReuseShuffleCost + VecCost - ScalarCost;
}
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
Align alignment = cast<LoadInst>(VL0)->getAlign();
int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0,
CostKind, VL0);
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Load, ScalarTy, alignment, 0, CostKind, VL0);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
int VecLdCost;
InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecLdCost;
if (E->State == TreeEntry::Vectorize) {
VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0,
CostKind, VL0);
Expand All @@ -3759,12 +3761,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
auto *SI =
cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
Align Alignment = SI->getAlign();
int ScalarEltCost =
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0,
CostKind, VL0);
int ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
VecTy, Alignment, 0, CostKind, VL0);
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0);
InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecStCost = TTI->getMemoryOpCost(
Instruction::Store, VecTy, Alignment, 0, CostKind, VL0);
if (IsReorder)
VecStCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
Expand All @@ -3777,14 +3778,16 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {

// Calculate the cost of the scalar and vector calls.
IntrinsicCostAttributes CostAttrs(ID, *CI, ElementCount::getFixed(1), 1);
int ScalarEltCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
InstructionCost ScalarEltCost =
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
int ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;

auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
int VecCallCost = std::min(VecCallCosts.first, VecCallCosts.second);
InstructionCost VecCallCost =
std::min(VecCallCosts.first, VecCallCosts.second);

LLVM_DEBUG(dbgs() << "SLP: Call cost " << VecCallCost - ScalarCallCost
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
Expand All @@ -3799,7 +3802,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
(Instruction::isCast(E->getOpcode()) &&
Instruction::isCast(E->getAltOpcode()))) &&
"Invalid Shuffle Vector Operand");
int ScalarCost = 0;
InstructionCost ScalarCost = 0;
if (NeedToShuffleReuses) {
for (unsigned Idx : E->ReuseShuffleIndices) {
Instruction *I = cast<Instruction>(VL[Idx]);
Expand All @@ -3823,7 +3826,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
// VecCost is equal to sum of the cost of creating 2 vectors
// and the cost of creating shuffle.
int VecCost = 0;
InstructionCost VecCost = 0;
if (Instruction::isBinaryOp(E->getOpcode())) {
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
Expand Down Expand Up @@ -4120,21 +4123,23 @@ InstructionCost BoUpSLP::getTreeCost() {
return Cost;
}

int BoUpSLP::getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const {
InstructionCost
BoUpSLP::getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const {
unsigned NumElts = Ty->getNumElements();
APInt DemandedElts = APInt::getNullValue(NumElts);
for (unsigned I = 0; I < NumElts; ++I)
if (!ShuffledIndices.count(I))
DemandedElts.setBit(I);
int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
/*Extract*/ false);
InstructionCost Cost =
TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
/*Extract*/ false);
if (!ShuffledIndices.empty())
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
return Cost;
}

int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// Find the type of the operands in VL.
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
Expand Down

0 comments on commit be9184b

Please sign in to comment.