Skip to content

Commit fdec501

Browse files
committed
[CostModel] Replace getUserCost with getInstructionCost
* Replace getUserCost with getInstructionCost, covering all cost kinds. * Remove getInstructionLatency, it's not implemented by any backends, and we should fold the functionality into getUserCost (now getInstructionCost) to make it easier for targets to handle the cost kinds with their existing cost callbacks. Original Patch by @samparker (Sam Parker) Differential Revision: https://reviews.llvm.org/D79483
1 parent 27cbfa7 commit fdec501

29 files changed

+263
-302
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 20 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -219,29 +219,6 @@ class TargetTransformInfo {
219219
TCK_SizeAndLatency ///< The weighted sum of size and latency.
220220
};
221221

222-
/// Query the cost of a specified instruction.
223-
///
224-
/// Clients should use this interface to query the cost of an existing
225-
/// instruction. The instruction must have a valid parent (basic block).
226-
///
227-
/// Note, this method does not cache the cost calculation and it
228-
/// can be expensive in some cases.
229-
InstructionCost getInstructionCost(const Instruction *I,
230-
enum TargetCostKind kind) const {
231-
InstructionCost Cost;
232-
switch (kind) {
233-
case TCK_Latency:
234-
Cost = getInstructionLatency(I);
235-
break;
236-
case TCK_RecipThroughput:
237-
case TCK_CodeSize:
238-
case TCK_SizeAndLatency:
239-
Cost = getUserCost(I, kind);
240-
break;
241-
}
242-
return Cost;
243-
}
244-
245222
/// Underlying constants for 'cost' values in this interface.
246223
///
247224
/// Many APIs in this interface return a cost. This enum defines the
@@ -320,14 +297,16 @@ class TargetTransformInfo {
320297
///
321298
/// The returned cost is defined in terms of \c TargetCostConstants, see its
322299
/// comments for a detailed explanation of the cost values.
323-
InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
324-
TargetCostKind CostKind) const;
300+
InstructionCost getInstructionCost(const User *U,
301+
ArrayRef<const Value *> Operands,
302+
TargetCostKind CostKind) const;
325303

326-
/// This is a helper function which calls the two-argument getUserCost
327-
/// with \p Operands which are the current operands U has.
328-
InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const {
304+
/// This is a helper function which calls the three-argument
305+
/// getInstructionCost with \p Operands which are the current operands U has.
306+
InstructionCost getInstructionCost(const User *U,
307+
TargetCostKind CostKind) const {
329308
SmallVector<const Value *, 4> Operands(U->operand_values());
330-
return getUserCost(U, Operands, CostKind);
309+
return getInstructionCost(U, Operands, CostKind);
331310
}
332311

333312
/// If a branch or a select condition is skewed in one direction by more than
@@ -432,11 +411,11 @@ class TargetTransformInfo {
432411
/// Parameters that control the generic loop unrolling transformation.
433412
struct UnrollingPreferences {
434413
/// The cost threshold for the unrolled loop. Should be relative to the
435-
/// getUserCost values returned by this API, and the expectation is that
436-
/// the unrolled loop's instructions when run through that interface should
437-
/// not exceed this cost. However, this is only an estimate. Also, specific
438-
/// loops may be unrolled even with a cost above this threshold if deemed
439-
/// profitable. Set this to UINT_MAX to disable the loop body cost
414+
/// getInstructionCost values returned by this API, and the expectation is
415+
/// that the unrolled loop's instructions when run through that interface
416+
/// should not exceed this cost. However, this is only an estimate. Also,
417+
/// specific loops may be unrolled even with a cost above this threshold if
418+
/// deemed profitable. Set this to UINT_MAX to disable the loop body cost
440419
/// restriction.
441420
unsigned Threshold;
442421
/// If complete unrolling will reduce the cost of the loop, we will boost
@@ -1519,10 +1498,6 @@ class TargetTransformInfo {
15191498
/// @}
15201499

15211500
private:
1522-
/// Estimate the latency of specified instruction.
1523-
/// Returns 1 as the default value.
1524-
InstructionCost getInstructionLatency(const Instruction *I) const;
1525-
15261501
/// The abstract base class used to type erase specific TTI
15271502
/// implementations.
15281503
class Concept;
@@ -1549,9 +1524,9 @@ class TargetTransformInfo::Concept {
15491524
getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
15501525
ProfileSummaryInfo *PSI,
15511526
BlockFrequencyInfo *BFI) = 0;
1552-
virtual InstructionCost getUserCost(const User *U,
1553-
ArrayRef<const Value *> Operands,
1554-
TargetCostKind CostKind) = 0;
1527+
virtual InstructionCost getInstructionCost(const User *U,
1528+
ArrayRef<const Value *> Operands,
1529+
TargetCostKind CostKind) = 0;
15551530
virtual BranchProbability getPredictableBranchThreshold() = 0;
15561531
virtual bool hasBranchDivergence() = 0;
15571532
virtual bool useGPUDivergenceAnalysis() = 0;
@@ -1866,7 +1841,6 @@ class TargetTransformInfo::Concept {
18661841
virtual bool supportsScalableVectors() const = 0;
18671842
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
18681843
Align Alignment) const = 0;
1869-
virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
18701844
virtual VPLegalization
18711845
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
18721846
};
@@ -1901,9 +1875,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
19011875
InstructionCost getMemcpyCost(const Instruction *I) override {
19021876
return Impl.getMemcpyCost(I);
19031877
}
1904-
InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
1905-
TargetCostKind CostKind) override {
1906-
return Impl.getUserCost(U, Operands, CostKind);
1878+
InstructionCost getInstructionCost(const User *U,
1879+
ArrayRef<const Value *> Operands,
1880+
TargetCostKind CostKind) override {
1881+
return Impl.getInstructionCost(U, Operands, CostKind);
19071882
}
19081883
BranchProbability getPredictableBranchThreshold() override {
19091884
return Impl.getPredictableBranchThreshold();
@@ -2518,10 +2493,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
25182493
return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
25192494
}
25202495

2521-
InstructionCost getInstructionLatency(const Instruction *I) override {
2522-
return Impl.getInstructionLatency(I);
2523-
}
2524-
25252496
VPLegalization
25262497
getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
25272498
return Impl.getVPLegalizationStrategy(PI);

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 15 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,12 @@ class TargetTransformInfoImplBase {
500500
// FIXME: Unlikely to be true for CodeSize.
501501
return TTI::TCC_Expensive;
502502
}
503+
504+
// Assume a 3cy latency for fp arithmetic ops.
505+
if (CostKind == TTI::TCK_Latency)
506+
if (Ty->getScalarType()->isFloatingPointTy())
507+
return 3;
508+
503509
return 1;
504510
}
505511

@@ -993,8 +999,9 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
993999
return TTI::TCC_Basic;
9941000
}
9951001

996-
InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
997-
TTI::TargetCostKind CostKind) {
1002+
InstructionCost getInstructionCost(const User *U,
1003+
ArrayRef<const Value *> Operands,
1004+
TTI::TargetCostKind CostKind) {
9981005
using namespace llvm::PatternMatch;
9991006

10001007
auto *TargetTTI = static_cast<T *>(this);
@@ -1097,6 +1104,9 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
10971104
CostKind, I);
10981105
}
10991106
case Instruction::Load: {
1107+
// FIXME: Arbitary cost which could come from the backend.
1108+
if (CostKind == TTI::TCK_Latency)
1109+
return 4;
11001110
auto *LI = cast<LoadInst>(U);
11011111
Type *LoadType = U->getType();
11021112
// If there is a non-register sized type, the cost estimation may expand
@@ -1248,39 +1258,10 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
12481258
return TargetTTI->getVectorInstrCost(*EEI, DstTy, Idx);
12491259
}
12501260
}
1251-
// By default, just classify everything as 'basic'.
1252-
return TTI::TCC_Basic;
1253-
}
12541261

1255-
InstructionCost getInstructionLatency(const Instruction *I) {
1256-
SmallVector<const Value *, 4> Operands(I->operand_values());
1257-
if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free)
1258-
return 0;
1259-
1260-
if (isa<LoadInst>(I))
1261-
return 4;
1262-
1263-
Type *DstTy = I->getType();
1264-
1265-
// Usually an intrinsic is a simple instruction.
1266-
// A real function call is much slower.
1267-
if (auto *CI = dyn_cast<CallInst>(I)) {
1268-
const Function *F = CI->getCalledFunction();
1269-
if (!F || static_cast<T *>(this)->isLoweredToCall(F))
1270-
return 40;
1271-
// Some intrinsics return a value and a flag, we use the value type
1272-
// to decide its latency.
1273-
if (StructType *StructTy = dyn_cast<StructType>(DstTy))
1274-
DstTy = StructTy->getElementType(0);
1275-
// Fall through to simple instructions.
1276-
}
1277-
1278-
if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
1279-
DstTy = VectorTy->getElementType();
1280-
if (DstTy->isFloatingPointTy())
1281-
return 3;
1282-
1283-
return 1;
1262+
// By default, just classify everything as 'basic' or -1 to represent that
1263+
// don't know the throughput cost.
1264+
return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic;
12841265
}
12851266
};
12861267
} // namespace llvm

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -638,13 +638,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
638638
SimplifyAndSetOp);
639639
}
640640

641-
InstructionCost getInstructionLatency(const Instruction *I) {
642-
if (isa<LoadInst>(I))
643-
return getST()->getSchedModel().DefaultLoadLatency;
644-
645-
return BaseT::getInstructionLatency(I);
646-
}
647-
648641
virtual Optional<unsigned>
649642
getCacheSize(TargetTransformInfo::CacheLevel Level) const {
650643
return Optional<unsigned>(

llvm/lib/Analysis/CodeMetrics.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ void CodeMetrics::analyzeBasicBlock(
177177
if (InvI->cannotDuplicate())
178178
notDuplicatable = true;
179179

180-
NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
180+
NumInsts += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
181181
}
182182

183183
if (isa<ReturnInst>(BB->getTerminator()))

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,8 +1361,8 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
13611361
Operands.push_back(SimpleOp);
13621362
else
13631363
Operands.push_back(Op);
1364-
return TTI.getUserCost(&GEP, Operands,
1365-
TargetTransformInfo::TCK_SizeAndLatency) ==
1364+
return TTI.getInstructionCost(&GEP, Operands,
1365+
TargetTransformInfo::TCK_SizeAndLatency) ==
13661366
TargetTransformInfo::TCC_Free;
13671367
}
13681368

@@ -1639,7 +1639,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
16391639
if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
16401640
SROAArgValues[&I] = SROAArg;
16411641

1642-
return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
1642+
return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
16431643
TargetTransformInfo::TCC_Free;
16441644
}
16451645

@@ -1662,7 +1662,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
16621662
if (auto *SROAArg = getSROAArgForValueOrNull(Op))
16631663
SROAArgValues[&I] = SROAArg;
16641664

1665-
return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
1665+
return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
16661666
TargetTransformInfo::TCC_Free;
16671667
}
16681668

@@ -1692,7 +1692,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
16921692
break;
16931693
}
16941694

1695-
return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
1695+
return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
16961696
TargetTransformInfo::TCC_Free;
16971697
}
16981698

@@ -2390,7 +2390,7 @@ bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
23902390
bool CallAnalyzer::visitInstruction(Instruction &I) {
23912391
// Some instructions are free. All of the free intrinsics can also be
23922392
// handled by SROA, etc.
2393-
if (TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
2393+
if (TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
23942394
TargetTransformInfo::TCC_Free)
23952395
return true;
23962396

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -221,10 +221,10 @@ unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
221221
}
222222

223223
InstructionCost
224-
TargetTransformInfo::getUserCost(const User *U,
225-
ArrayRef<const Value *> Operands,
226-
enum TargetCostKind CostKind) const {
227-
InstructionCost Cost = TTIImpl->getUserCost(U, Operands, CostKind);
224+
TargetTransformInfo::getInstructionCost(const User *U,
225+
ArrayRef<const Value *> Operands,
226+
enum TargetCostKind CostKind) const {
227+
InstructionCost Cost = TTIImpl->getInstructionCost(U, Operands, CostKind);
228228
assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) &&
229229
"TTI should not produce negative costs!");
230230
return Cost;
@@ -1149,11 +1149,6 @@ bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType,
11491149
return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment);
11501150
}
11511151

1152-
InstructionCost
1153-
TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
1154-
return TTIImpl->getInstructionLatency(I);
1155-
}
1156-
11571152
TargetTransformInfo::Concept::~Concept() = default;
11581153

11591154
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6603,8 +6603,8 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
66036603
// If it's safe to speculatively execute, then it should not have side
66046604
// effects; therefore, it's safe to sink and possibly *not* execute.
66056605
return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
6606-
TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >=
6607-
TargetTransformInfo::TCC_Expensive;
6606+
TTI->getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency) >=
6607+
TargetTransformInfo::TCC_Expensive;
66086608
}
66096609

66106610
/// Returns true if a SelectInst should be turned into an explicit branch.

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2340,8 +2340,8 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
23402340
}
23412341

23422342
SmallVector<const Value*, 4> Operands(I.operand_values());
2343-
Cost +=
2344-
getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
2343+
Cost += getInstructionCost(&I, Operands,
2344+
TargetTransformInfo::TCK_SizeAndLatency);
23452345
}
23462346
}
23472347

llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -340,9 +340,10 @@ unsigned HexagonTTIImpl::getCacheLineSize() const {
340340
return ST.getL1CacheLineSize();
341341
}
342342

343-
InstructionCost HexagonTTIImpl::getUserCost(const User *U,
344-
ArrayRef<const Value *> Operands,
345-
TTI::TargetCostKind CostKind) {
343+
InstructionCost
344+
HexagonTTIImpl::getInstructionCost(const User *U,
345+
ArrayRef<const Value *> Operands,
346+
TTI::TargetCostKind CostKind) {
346347
auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
347348
if (!CI->isIntegerCast())
348349
return false;
@@ -364,7 +365,7 @@ InstructionCost HexagonTTIImpl::getUserCost(const User *U,
364365
if (const CastInst *CI = dyn_cast<const CastInst>(U))
365366
if (isCastFoldedIntoLoad(CI))
366367
return TargetTransformInfo::TCC_Free;
367-
return BaseT::getUserCost(U, Operands, CostKind);
368+
return BaseT::getInstructionCost(U, Operands, CostKind);
368369
}
369370

370371
bool HexagonTTIImpl::shouldBuildLookupTables() const {

llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,9 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
165165

166166
/// @}
167167

168-
InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
169-
TTI::TargetCostKind CostKind);
168+
InstructionCost getInstructionCost(const User *U,
169+
ArrayRef<const Value *> Operands,
170+
TTI::TargetCostKind CostKind);
170171

171172
// Hexagon specific decision to generate a lookup table.
172173
bool shouldBuildLookupTables() const;

0 commit comments

Comments
 (0)