Skip to content

Commit

Permalink
[CostModel] Align the cost model for intrinsics for scalable/fixed-wi…
Browse files Browse the repository at this point in the history
…dth vectors.

Let getIntrinsicInstrCost call getTypeBasedIntrinsicInstrCost for scalable vectors,
similar to how this is done for fixed-width vectors, instead of falling back
on BaseT::getIntrinsicInstrCost().

If the intrinsic cannot be costed (or is not overloaded by the target),
it will return InstructionCost::getInvalid() instead.

Depends on D97469

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D97470
  • Loading branch information
sdesmalen-arm committed Mar 31, 2021
1 parent dc7ebd2 commit b6d0529
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 9 deletions.
17 changes: 13 additions & 4 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1346,15 +1346,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return Cost;
}
}
// TODO: Handle the remaining intrinsic with scalable vector type
if (isa<ScalableVectorType>(RetTy))
return BaseT::getIntrinsicInstrCost(ICA, CostKind);

// Assume that we need to scalarize this intrinsic.
// Compute the scalarization overhead based on Args for a vector
// intrinsic.
unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
if (RetVF.isVector()) {
if (RetVF.isVector() && !RetVF.isScalable()) {
ScalarizationCost = 0;
if (!RetTy->isVoidTy())
ScalarizationCost +=
Expand Down Expand Up @@ -1399,6 +1396,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
SmallVector<unsigned, 2> ISDs;
switch (IID) {
default: {
// Scalable vectors cannot be scalarized, so return Invalid.
if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
return isa<ScalableVectorType>(Ty);
}))
return InstructionCost::getInvalid();

// Assume that we need to scalarize this intrinsic.
InstructionCost ScalarizationCost = ScalarizationCostPassed;
unsigned ScalarCalls = 1;
Expand Down Expand Up @@ -1810,6 +1813,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// this will emit a costly libcall, adding call overhead and spills. Make it
// very expensive.
if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
// Scalable vectors cannot be scalarized, so return Invalid.
if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
return isa<ScalableVectorType>(Ty);
}))
return InstructionCost::getInvalid();

unsigned ScalarizationCost = SkipScalarizationCost ?
ScalarizationCostPassed : getScalarizationOverhead(RetVTy, true, false);

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)

define void @count_zeroes(<vscale x 4 x i32> %A) {
; CHECK-LABEL: 'count_zeroes'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
%ctlz = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
%cttz = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
ret void
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Analysis/CostModel/AArch64/sve-math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ define <vscale x 2 x double> @fadd_v2f64(<vscale x 2 x double> %a, <vscale x 2 x

define <vscale x 2 x double> @sqrt_v2f64(<vscale x 2 x double> %a) {
; THRU-LABEL: 'sqrt_v2f64'
; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 2 x double> %r
;
; LATE-LABEL: 'sqrt_v2f64'
; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <vscale x 2 x double> %r
;
; SIZE-LABEL: 'sqrt_v2f64'
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <vscale x 2 x double> %r
;
; SIZE_LATE-LABEL: 'sqrt_v2f64'
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <vscale x 2 x double> %r
;
%r = call <vscale x 2 x double> @llvm.sqrt.v2f64(<vscale x 2 x double> %a)
Expand Down

0 comments on commit b6d0529

Please sign in to comment.