77 changes: 38 additions & 39 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ using namespace llvm;
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
int AArch64TTIImpl::getIntImmCost(int64_t Val) {
// Check if the immediate can be encoded within an instruction.
if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
return 0;
Expand All @@ -37,7 +37,7 @@ unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
}

/// \brief Calculate the cost of materializing the given constant.
unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();
Expand All @@ -51,18 +51,18 @@ unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {

// Split the constant into 64-bit chunks and calculate the cost for each
// chunk.
unsigned Cost = 0;
int Cost = 0;
for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
int64_t Val = Tmp.getSExtValue();
Cost += getIntImmCost(Val);
}
// We need at least one instruction to materialze the constant.
return std::max(1U, Cost);
return std::max(1, Cost);
}

unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) {
int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();
Expand Down Expand Up @@ -118,17 +118,17 @@ unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
}

if (Idx == ImmIdx) {
unsigned NumConstants = (BitSize + 63) / 64;
unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
int NumConstants = (BitSize + 63) / 64;
int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<unsigned>(TTI::TCC_Free)
? static_cast<int>(TTI::TCC_Free)
: Cost;
}
return AArch64TTIImpl::getIntImmCost(Imm, Ty);
}

unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) {
int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();
Expand All @@ -147,10 +147,10 @@ unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
if (Idx == 1) {
unsigned NumConstants = (BitSize + 63) / 64;
unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
int NumConstants = (BitSize + 63) / 64;
int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<unsigned>(TTI::TCC_Free)
? static_cast<int>(TTI::TCC_Free)
: Cost;
}
break;
Expand All @@ -176,8 +176,7 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}

unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src) {
int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");

Expand Down Expand Up @@ -259,13 +258,13 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}

unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) {
int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");

if (Index != -1U) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);

// This type is legalized to a scalar type.
if (!LT.second.isVector())
Expand All @@ -284,12 +283,12 @@ unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return 2;
}

unsigned AArch64TTIImpl::getArithmeticInstrCost(
int AArch64TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

int ISD = TLI->InstructionOpcodeToISD(Opcode);

Expand All @@ -300,10 +299,9 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost(
// normally expanded to the sequence ADD + CMP + SELECT + SRA.
// The OperandValue properties many not be same as that of previous
// operation; conservatively assume OP_None.
unsigned Cost =
getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
Expand Down Expand Up @@ -331,7 +329,7 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost(
}
}

unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
Expand All @@ -346,14 +344,14 @@ unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return 1;
}

unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) {
int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) {

int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower vector selects well that are wider than the register width.
if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
// We would need this many instructions to hide the scalarization happening.
const unsigned AmortizationCost = 20;
const int AmortizationCost = 20;
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
VectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
Expand All @@ -377,10 +375,9 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}

unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned AddressSpace) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);

if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isIntegerTy(64)) {
Expand All @@ -389,7 +386,7 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// practice on inlined memcpy code.
// We make v2i64 stores expensive so that we will only vectorize if there
// are 6 other instructions getting vectorized.
unsigned AmortizationCost = 6;
int AmortizationCost = 6;

return LT.first * 2 * AmortizationCost;
}
Expand All @@ -407,9 +404,11 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}

unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
unsigned Alignment, unsigned AddressSpace) {
int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
unsigned AddressSpace) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");

Expand All @@ -427,8 +426,8 @@ unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
Alignment, AddressSpace);
}

unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
unsigned Cost = 0;
int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
int Cost = 0;
for (auto *I : Tys) {
if (!I->isVectorTy())
continue;
Expand Down
35 changes: 16 additions & 19 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,11 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
/// @{

using BaseT::getIntImmCost;
unsigned getIntImmCost(int64_t Val);
unsigned getIntImmCost(const APInt &Imm, Type *Ty);
unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty);
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
int getIntImmCost(int64_t Val);
int getIntImmCost(const APInt &Imm, Type *Ty);
int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);

/// @}
Expand Down Expand Up @@ -96,25 +95,25 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {

unsigned getMaxInterleaveFactor(unsigned VF);

unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);

unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);

unsigned getArithmeticInstrCost(
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);

unsigned getAddressComputationCost(Type *Ty, bool IsComplex);
int getAddressComputationCost(Type *Ty, bool IsComplex);

unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);

unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);

unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);

void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);

Expand All @@ -123,11 +122,9 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {

bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);

unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
unsigned AddressSpace);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace);
/// @}
};

Expand Down
52 changes: 25 additions & 27 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using namespace llvm;

#define DEBUG_TYPE "armtti"

unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());

unsigned Bits = Ty->getPrimitiveSizeInBits();
Expand Down Expand Up @@ -47,7 +47,7 @@ unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 3;
}

unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");

Expand All @@ -61,7 +61,7 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {

if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
ISD == ISD::FP_EXTEND)) {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
if (Idx != -1)
return LT.first * NEONFltDblTbl[Idx].Cost;
Expand Down Expand Up @@ -245,8 +245,8 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}

unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
unsigned Index) {
int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
unsigned Index) {
// Penalize inserting into an D-subregister. We end up with a three times
// lower estimated throughput on swift.
if (ST->isSwift() &&
Expand All @@ -265,8 +265,7 @@ unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}

unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) {
int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {

int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
Expand All @@ -292,14 +291,14 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return NEONVectorSelectTbl[Idx].Cost;
}

std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
return LT.first;
}

return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}

unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
Expand All @@ -314,7 +313,7 @@ unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return 1;
}

unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
int ARMTTIImpl::getFPOpCost(Type *Ty) {
// Use similar logic that's in ARMISelLowering:
// Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
// to VFP.
Expand All @@ -333,8 +332,8 @@ unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
return TargetTransformInfo::TCC_Expensive;
}

unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
// We only handle costs of reverse and alternate shuffles for now.
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
Expand All @@ -353,7 +352,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};

std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);

int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
Expand All @@ -379,7 +378,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,

{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};

std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
int Idx =
CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
Expand All @@ -389,13 +388,13 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}

unsigned ARMTTIImpl::getArithmeticInstrCost(
int ARMTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {

int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

const unsigned FunctionCallDivCost = 20;
const unsigned ReciprocalDivCost = 10;
Expand Down Expand Up @@ -448,8 +447,8 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
if (Idx != -1)
return LT.first * CostTbl[Idx].Cost;

unsigned Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
Opd1PropInfo, Opd2PropInfo);
int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
Opd1PropInfo, Opd2PropInfo);

// This is somewhat of a hack. The problem that we are facing is that SROA
// creates a sequence of shift, and, or instructions to construct values.
Expand All @@ -465,10 +464,9 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
return Cost;
}

unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);

if (Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isDoubleTy()) {
Expand All @@ -479,11 +477,11 @@ unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}

unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
unsigned AddressSpace) {
int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
unsigned AddressSpace) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");

Expand Down
29 changes: 13 additions & 16 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
/// @{

using BaseT::getIntImmCost;
unsigned getIntImmCost(const APInt &Imm, Type *Ty);
int getIntImmCost(const APInt &Imm, Type *Ty);

/// @}

Expand Down Expand Up @@ -92,34 +92,31 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
return 1;
}

unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);

unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);

unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);

unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);

unsigned getAddressComputationCost(Type *Val, bool IsComplex);
int getAddressComputationCost(Type *Val, bool IsComplex);

unsigned getFPOpCost(Type *Ty);
int getFPOpCost(Type *Ty);

unsigned getArithmeticInstrCost(
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);

unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);

unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
unsigned Alignment,
unsigned AddressSpace);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace);
/// @}
};

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,12 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
return false;
}

unsigned NVPTXTTIImpl::getArithmeticInstrCost(
int NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

int ISD = TLI->InstructionOpcodeToISD(Opcode);

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {

bool isSourceOfDivergence(const Value *V);

unsigned getArithmeticInstrCost(
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
Expand Down
33 changes: 15 additions & 18 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}

unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
if (DisablePPCConstHoist)
return BaseT::getIntImmCost(Imm, Ty);

Expand Down Expand Up @@ -64,8 +64,8 @@ unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 4 * TTI::TCC_Basic;
}

unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) {
int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) {
if (DisablePPCConstHoist)
return BaseT::getIntImmCost(IID, Idx, Imm, Ty);

Expand Down Expand Up @@ -98,8 +98,8 @@ unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
return PPCTTIImpl::getIntImmCost(Imm, Ty);
}

unsigned PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) {
int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) {
if (DisablePPCConstHoist)
return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);

Expand Down Expand Up @@ -246,7 +246,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}

unsigned PPCTTIImpl::getArithmeticInstrCost(
int PPCTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
Expand All @@ -257,24 +257,22 @@ unsigned PPCTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo);
}

unsigned PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}

unsigned PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");

return BaseT::getCastInstrCost(Opcode, Dst, Src);
}

unsigned PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) {
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}

unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) {
int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");

int ISD = TLI->InstructionOpcodeToISD(Opcode);
Expand Down Expand Up @@ -313,15 +311,14 @@ unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}

unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) {
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");

unsigned Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);

// VSX loads/stores support unaligned access.
if (ST->hasVSX()) {
Expand Down
24 changes: 11 additions & 13 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,11 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
/// @{

using BaseT::getIntImmCost;
unsigned getIntImmCost(const APInt &Imm, Type *Ty);
int getIntImmCost(const APInt &Imm, Type *Ty);

unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty);
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);

TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
Expand All @@ -71,19 +70,18 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor(unsigned VF);
unsigned getArithmeticInstrCost(
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp);
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);

/// @}
};
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//

unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();
Expand Down Expand Up @@ -63,8 +63,8 @@ unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 4 * TTI::TCC_Basic;
}

unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) {
int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();
Expand Down Expand Up @@ -181,8 +181,8 @@ unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
}

unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) {
int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();
Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,11 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
/// \name Scalar TTI Implementations
/// @{

unsigned getIntImmCost(const APInt &Imm, Type *Ty);
int getIntImmCost(const APInt &Imm, Type *Ty);

unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty);
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);

TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);

Expand Down
120 changes: 56 additions & 64 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}

unsigned X86TTIImpl::getArithmeticInstrCost(
int X86TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
Expand All @@ -101,10 +101,9 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
// normally expanded to the sequence SRA + SRL + ADD + SRA.
// The OperandValue properties many not be same as that of previous
// operation;conservatively assume OP_None.
unsigned Cost =
2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
int Cost = 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info,
Op2Info, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
Expand Down Expand Up @@ -349,15 +348,15 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
}

unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
// We only estimate the cost of reverse and alternate shuffles.
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);

if (Kind == TTI::SK_Reverse) {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
unsigned Cost = 1;
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
int Cost = 1;
if (LT.second.getSizeInBits() > 128)
Cost = 3; // Extract + insert + copy.

Expand All @@ -368,7 +367,7 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
if (Kind == TTI::SK_Alternate) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);

// The backend knows how to generate a single VEX.256 version of
// instruction VPBLENDW if the target supports AVX2.
Expand Down Expand Up @@ -464,7 +463,7 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}

unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");

Expand Down Expand Up @@ -628,8 +627,8 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
};

std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);

if (ST->hasSSE2() && !ST->hasAVX()) {
int Idx =
Expand Down Expand Up @@ -669,10 +668,9 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}

unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) {
int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);

MVT MTy = LT.second;

Expand Down Expand Up @@ -739,13 +737,12 @@ unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}

unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) {
int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");

if (Index != -1U) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);

// This type is legalized to a scalar type.
if (!LT.second.isVector())
Expand All @@ -763,10 +760,9 @@ unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}

unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
bool Extract) {
int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
assert (Ty->isVectorTy() && "Can only scalarize vectors");
unsigned Cost = 0;
int Cost = 0;

for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
if (Insert)
Expand All @@ -778,9 +774,8 @@ unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
return Cost;
}

unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) {
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) {
// Handle non-power-of-two vectors such as <3 x float>
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
unsigned NumElem = VTy->getVectorNumElements();
Expand All @@ -798,22 +793,21 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,

// Assume that all other non-power-of-two numbers are scalarized.
if (!isPowerOf2_32(NumElem)) {
unsigned Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(),
Alignment, AddressSpace);
unsigned SplitCost = getScalarizationOverhead(Src,
Opcode == Instruction::Load,
Opcode==Instruction::Store);
int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment,
AddressSpace);
int SplitCost = getScalarizationOverhead(Src, Opcode == Instruction::Load,
Opcode == Instruction::Store);
return NumElem * Cost + SplitCost;
}
}

// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");

// Each load/store unit costs 1.
unsigned Cost = LT.first * 1;
int Cost = LT.first * 1;

// On Sandybridge 256bit load/stores are double pumped
// (but not on Haswell).
Expand All @@ -823,9 +817,9 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return Cost;
}

unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
unsigned Alignment,
unsigned AddressSpace) {
int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
unsigned Alignment,
unsigned AddressSpace) {
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
if (!SrcVTy)
// To calculate scalar take the regular cost, without mask
Expand All @@ -838,25 +832,23 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
(Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) ||
!isPowerOf2_32(NumElem)) {
// Scalarization
unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
unsigned ScalarCompareCost =
getCmpSelInstrCost(Instruction::ICmp,
Type::getInt8Ty(getGlobalContext()), NULL);
unsigned BranchCost = getCFInstrCost(Instruction::Br);
unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);

unsigned ValueSplitCost =
getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load,
Opcode == Instruction::Store);
unsigned MemopCost =
int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
int ScalarCompareCost = getCmpSelInstrCost(
Instruction::ICmp, Type::getInt8Ty(getGlobalContext()), NULL);
int BranchCost = getCFInstrCost(Instruction::Br);
int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);

int ValueSplitCost = getScalarizationOverhead(
SrcVTy, Opcode == Instruction::Load, Opcode == Instruction::Store);
int MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace);
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
}

// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
unsigned Cost = 0;
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
int Cost = 0;
if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires expand/truncate for data and a shuffle for mask.
Expand All @@ -876,7 +868,7 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
return Cost+LT.first;
}

unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
Expand All @@ -889,10 +881,10 @@ unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return BaseT::getAddressComputationCost(Ty, IsComplex);
}

unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
bool IsPairwise) {
int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
bool IsPairwise) {

std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);

MVT MTy = LT.second;

Expand Down Expand Up @@ -972,7 +964,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
unsigned X86TTIImpl::getIntImmCost(int64_t Val) {
int X86TTIImpl::getIntImmCost(int64_t Val) {
if (Val == 0)
return TTI::TCC_Free;

Expand All @@ -982,7 +974,7 @@ unsigned X86TTIImpl::getIntImmCost(int64_t Val) {
return 2 * TTI::TCC_Basic;
}

unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();
Expand All @@ -1006,18 +998,18 @@ unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {

// Split the constant into 64-bit chunks and calculate the cost for each
// chunk.
unsigned Cost = 0;
int Cost = 0;
for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
int64_t Val = Tmp.getSExtValue();
Cost += getIntImmCost(Val);
}
// We need at least one instruction to materialze the constant.
return std::max(1U, Cost);
return std::max(1, Cost);
}

unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) {
int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty) {
assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();
Expand Down Expand Up @@ -1075,18 +1067,18 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
}

if (Idx == ImmIdx) {
unsigned NumConstants = (BitSize + 63) / 64;
unsigned Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
int NumConstants = (BitSize + 63) / 64;
int Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<unsigned>(TTI::TCC_Free)
? static_cast<int>(TTI::TCC_Free)
: Cost;
}

return X86TTIImpl::getIntImmCost(Imm, Ty);
}

unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) {
int X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) {
assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();
Expand Down
36 changes: 17 additions & 19 deletions llvm/lib/Target/X86/X86TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
const X86Subtarget *ST;
const X86TargetLowering *TLI;

unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);

const X86Subtarget *getST() const { return ST; }
const X86TargetLowering *getTLI() const { return TLI; }
Expand Down Expand Up @@ -62,34 +62,32 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor(unsigned VF);
unsigned getArithmeticInstrCost(
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp);
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);

unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex);
int getAddressComputationCost(Type *PtrTy, bool IsComplex);

unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);

unsigned getIntImmCost(int64_t);
int getIntImmCost(int64_t);

unsigned getIntImmCost(const APInt &Imm, Type *Ty);
int getIntImmCost(const APInt &Imm, Type *Ty);

unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Type *Ty);
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty);
bool isLegalMaskedLoad(Type *DataType, int Consecutive);
bool isLegalMaskedStore(Type *DataType, int Consecutive);
bool areInlineCompatible(const Function *Caller,
Expand Down