111 changes: 83 additions & 28 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,83 @@ bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
return true;
}

IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) :
II(&I), RetTy(I.getType()), IID(I.getIntrinsicID()) {

FunctionType *FTy = I.getCalledFunction()->getFunctionType();
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
Arguments.insert(Arguments.begin(), I.arg_begin(), I.arg_end());
if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
FMF = FPMO->getFastMathFlags();
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
unsigned Factor) :
RetTy(CI.getType()), IID(Id), VF(Factor) {

if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();

Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
FunctionType *FTy =
CI.getCalledFunction()->getFunctionType();
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
unsigned Factor,
unsigned ScalarCost) :
RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {

if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();

Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
FunctionType *FTy =
CI.getCalledFunction()->getFunctionType();
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys,
FastMathFlags Flags) :
RetTy(RTy), IID(Id), FMF(Flags) {
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys,
FastMathFlags Flags,
unsigned ScalarCost) :
RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys,
FastMathFlags Flags,
unsigned ScalarCost,
const IntrinsicInst *I) :
II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys) :
RetTy(RTy), IID(Id) {
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
ArrayRef<Value *> Args) :
RetTy(Ty), IID(Id) {

Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
ParamTys.reserve(Arguments.size());
for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
ParamTys.push_back(Arguments[Idx]->getType());
}

bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
LoopInfo &LI, DominatorTree &DT,
bool ForceNestedLoop,
Expand Down Expand Up @@ -702,26 +779,10 @@ int TargetTransformInfo::getInterleavedMemoryOpCost(
return Cost;
}

int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) const {
int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, CostKind,
I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}

int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) const {
int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF,
CostKind, I);
int
TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const {
int Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
Expand Down Expand Up @@ -1361,14 +1422,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
}
case Instruction::Call:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
SmallVector<Value *, 4> Args(II->arg_operands());

FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(II))
FMF = FPMO->getFastMathFlags();

return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
FMF, 1, CostKind, II);
IntrinsicCostAttributes CostAttrs(*II);
return getIntrinsicInstrCost(CostAttrs, CostKind);
}
return -1;
default:
Expand Down
42 changes: 9 additions & 33 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,18 +558,15 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
}
}

template <typename T>
int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<T *> Args, FastMathFlags FMF,
unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) {
if (!intrinsicHasPackedVectorBenefit(ID))
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
if (!intrinsicHasPackedVectorBenefit(ICA.getID()))
return BaseT::getIntrinsicInstrCost(ICA, CostKind);

Type *RetTy = ICA.getReturnType();
EVT OrigTy = TLI->getValueType(DL, RetTy);
if (!OrigTy.isSimple()) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}

// Legalize the type.
Expand All @@ -588,31 +585,14 @@ int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,

// TODO: Get more refined intrinsic costs?
unsigned InstRate = getQuarterRateInstrCost();
if (ID == Intrinsic::fma) {
if (ICA.getID() == Intrinsic::fma) {
InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost()
: getQuarterRateInstrCost();
}

return LT.first * NElts * InstRate;
}

int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) {
return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF, CostKind, I);
}

int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) {
return getIntrinsicInstrCost<Type>(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, CostKind, I);
}

unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode,
TTI::TargetCostKind CostKind) {
// XXX - For some reason this isn't called for switch.
Expand Down Expand Up @@ -981,12 +961,8 @@ GCNTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
}
case Instruction::Call: {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
SmallVector<Value *, 4> Args(II->arg_operands());
FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(II))
FMF = FPMO->getFastMathFlags();
return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
FMF, 1, CostKind, II);
IntrinsicCostAttributes CostAttrs(*II);
return getIntrinsicInstrCost(CostAttrs, CostKind);
} else {
return BaseT::getUserCost(U, Operands, CostKind);
}
Expand Down
16 changes: 2 additions & 14 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,20 +232,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
bool IsPairwise,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);

template <typename T>
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<T *> Args,
FastMathFlags FMF, unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getIntrinsicInstrCost(
Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);
int getIntrinsicInstrCost(
Intrinsic::ID IID, Type *RetTy, ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);
int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
int getMinMaxReductionCost(
VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
Expand Down
24 changes: 6 additions & 18 deletions llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,26 +131,14 @@ unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind);
}

unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
}

unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) {
if (ID == Intrinsic::bswap) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
unsigned
HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
if (ICA.getID() == Intrinsic::bswap) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ICA.getReturnType());
return LT.first + 2;
}
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, CostKind, I);
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}

unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
Expand Down
12 changes: 2 additions & 10 deletions llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,8 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
unsigned VF);
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys,
TTI::TargetCostKind CostKind);
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I);
unsigned getIntrinsicInstrCost(
Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
const SCEV *S);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
Expand Down
21 changes: 3 additions & 18 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -953,24 +953,9 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
return Cost;
}

unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) {
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
}

unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) {
if (ID == Intrinsic::bswap && ST->hasP9Vector())
return TLI->getTypeLegalizationCost(DL, RetTy).first;
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, CostKind, I);
unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}

bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
Expand Down
13 changes: 2 additions & 11 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,17 +119,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
bool UseMaskForCond = false,
bool UseMaskForGaps = false);
unsigned getIntrinsicInstrCost(
Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);

/// @}
};
Expand Down
24 changes: 4 additions & 20 deletions llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1151,26 +1151,10 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
return -1;
}

int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args,
FastMathFlags FMF, unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
int SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
int Cost = getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());
if (Cost != -1)
return Cost;
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I);
}

int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
if (Cost != -1)
return Cost;
return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
ScalarizationCostPassed, CostKind, I);
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
12 changes: 2 additions & 10 deletions llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
bool UseMaskForCond = false,
bool UseMaskForGaps = false);

int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
const Instruction *I = nullptr);
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
FastMathFlags FMF,
unsigned ScalarizationCostPassed = UINT_MAX,
TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
const Instruction *I = nullptr);
int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
/// @}
};

Expand Down
28 changes: 15 additions & 13 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2231,11 +2231,9 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,

unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }

int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) {

// Costs should match the codegen from:
// BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll
// BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll
Expand Down Expand Up @@ -2549,7 +2547,9 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::UADDO, MVT::i8, 1 },
};

Type *RetTy = ICA.getReturnType();
Type *OpTy = RetTy;
Intrinsic::ID IID = ICA.getID();
unsigned ISD = ISD::DELETED_NODE;
switch (IID) {
default:
Expand Down Expand Up @@ -2694,15 +2694,14 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
return LT.first * Entry->Cost;
}

return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF,
ScalarizationCostPassed, CostKind, I);
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}

int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);

static const CostTblEntry AVX512CostTbl[] = {
{ ISD::ROTL, MVT::v8i64, 1 },
{ ISD::ROTL, MVT::v4i64, 1 },
Expand Down Expand Up @@ -2753,6 +2752,9 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSHL, MVT::i8, 4 }
};

Intrinsic::ID IID = ICA.getID();
Type *RetTy = ICA.getReturnType();
const SmallVectorImpl<Value *> &Args = ICA.getArgs();
unsigned ISD = ISD::DELETED_NODE;
switch (IID) {
default:
Expand Down Expand Up @@ -2792,7 +2794,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
return LT.first * Entry->Cost;
}

return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF, CostKind, I);
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}

int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
Expand Down
15 changes: 4 additions & 11 deletions llvm/lib/Target/X86/X86TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,10 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {

unsigned getAtomicMemIntrinsicMaxElementSize() const;

int getIntrinsicInstrCost(
Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys,
FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);

int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr);
int getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);

int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm,
Expand Down
11 changes: 3 additions & 8 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3322,14 +3322,9 @@ unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
assert(ID && "Expected intrinsic call!");

FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
FMF = FPMO->getFastMathFlags();

SmallVector<Value *, 4> Operands(CI->arg_operands());
return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF,
TargetTransformInfo::TCK_RecipThroughput,
CI);
IntrinsicCostAttributes CostAttrs(ID, *CI, VF);
return TTI.getIntrinsicInstrCost(CostAttrs,
TargetTransformInfo::TCK_RecipThroughput);
}

static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
Expand Down
22 changes: 5 additions & 17 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3247,13 +3247,9 @@ getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI,
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

// Calculate the cost of the scalar and vector calls.
FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
FMF = FPMO->getFastMathFlags();

SmallVector<Value *, 4> Args(CI->arg_operands());
int IntrinsicCost = TTI->getIntrinsicInstrCost(ID, CI->getType(), Args, FMF,
VecTy->getNumElements());
IntrinsicCostAttributes CostAttrs(ID, *CI, VecTy->getNumElements());
int IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);

auto Shape =
VFShape::get(*CI, {static_cast<unsigned>(VecTy->getNumElements()), false},
Expand Down Expand Up @@ -3584,16 +3580,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

// Calculate the cost of the scalar and vector calls.
SmallVector<Type *, 4> ScalarTys;
for (unsigned op = 0, opc = CI->getNumArgOperands(); op != opc; ++op)
ScalarTys.push_back(CI->getArgOperand(op)->getType());

FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
FMF = FPMO->getFastMathFlags();

int ScalarEltCost =
TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF, 1, CostKind);
IntrinsicCostAttributes CostAttrs(ID, *CI, 1, 1);
int ScalarEltCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
Expand Down