Skip to content

Commit

Permalink
[SVE] Remove usage of getMaxVScale for AArch64, in favour of IR Attri…
Browse files Browse the repository at this point in the history
…bute

Removed AArch64 usage of the getMaxVScale interface, replacing it with
the vscale_range(min, max) IR Attribute.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D106277
  • Loading branch information
DylanFleming-arm committed Aug 17, 2021
1 parent 9de882f commit ef198cd
Show file tree
Hide file tree
Showing 20 changed files with 98 additions and 56 deletions.
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/TargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,11 @@ class TargetInfo : public virtual TransferrableTargetInfo,
/// across the current set of primary and secondary targets.
virtual ArrayRef<Builtin::Info> getTargetBuiltins() const = 0;

/// Returns target-specific min and max values VScale_Range.
virtual Optional<std::pair<unsigned, unsigned>>
getVScaleRange(const LangOptions &LangOpts) const {
return None;
}
/// The __builtin_clz* and __builtin_ctz* built-in
/// functions are specified to have undefined results for zero inputs, but
/// on targets that support these operations in a way that provides
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Basic/Targets/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,17 @@ ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const {
Builtin::FirstTSBuiltin);
}

Optional<std::pair<unsigned, unsigned>>
AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
if (LangOpts.ArmSveVectorBits) {
unsigned VScale = LangOpts.ArmSveVectorBits / 128;
return std::pair<unsigned, unsigned>(VScale, VScale);
}
if (hasFeature("sve"))
return std::pair<unsigned, unsigned>(0, 16);
return None;
}

bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
return Feature == "aarch64" || Feature == "arm64" || Feature == "arm" ||
(Feature == "neon" && (FPU & NeonMode)) ||
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Basic/Targets/AArch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {

ArrayRef<Builtin::Info> getTargetBuiltins() const override;

Optional<std::pair<unsigned, unsigned>>
getVScaleRange(const LangOptions &LangOpts) const override;

bool hasFeature(StringRef Feature) const override;
bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) override;
Expand Down
12 changes: 7 additions & 5 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,11 +484,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
// function.
CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));

// Add vscale attribute if appropriate.
if (getLangOpts().ArmSveVectorBits) {
unsigned VScale = getLangOpts().ArmSveVectorBits / 128;
CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(),
VScale, VScale));
// Add vscale_range attribute if appropriate.
Optional<std::pair<unsigned, unsigned>> VScaleRange =
getContext().getTargetInfo().getVScaleRange(getLangOpts());
if (VScaleRange) {
CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
getLLVMContext(), VScaleRange.getValue().first,
VScaleRange.getValue().second));
}

// If we generated an unreachable return block, delete it now.
Expand Down
5 changes: 4 additions & 1 deletion clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=512
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=1024
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=2048 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=2048
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=128 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=128
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=256 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=256
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE

// CHECK-LABEL: @func() #0
// CHECK: attributes #0 = { {{.*}} vscale_range([[#div(VBITS,128)]],[[#div(VBITS,128)]]) {{.*}} }
// CHECK-NONE-NOT: vscale_range
// CHECK-NONE: attributes #0 = { {{.*}} vscale_range(0,16) {{.*}} }
void func() {}
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1589,7 +1589,7 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
ElementCount LegalVF = LT.second.getVectorElementCount();
InstructionCost MemOpCost =
getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
return LT.first * MemOpCost * getMaxNumElements(LegalVF);
return LT.first * MemOpCost * getMaxNumElements(LegalVF, I->getFunction());
}

bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
Expand Down
21 changes: 12 additions & 9 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,22 +125,25 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
return ST->getMinVectorRegisterBitWidth();
}

Optional<unsigned> getMaxVScale() const {
if (ST->hasSVE())
return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
return BaseT::getMaxVScale();
}

/// Try to return an estimate cost factor that can be used as a multiplier
/// when scalarizing an operation for a vector with ElementCount \p VF.
/// For scalable vectors this currently takes the most pessimistic view based
/// upon the maximum possible value for vscale.
unsigned getMaxNumElements(ElementCount VF) const {
unsigned getMaxNumElements(ElementCount VF,
const Function *F = nullptr) const {
if (!VF.isScalable())
return VF.getFixedValue();
Optional<unsigned> MaxNumVScale = getMaxVScale();
assert(MaxNumVScale && "Expected valid max vscale value");
return *MaxNumVScale * VF.getKnownMinValue();

unsigned MaxNumVScale = 16;
if (F && F->hasFnAttribute(Attribute::VScaleRange)) {
unsigned VScaleMax =
F->getFnAttribute(Attribute::VScaleRange).getVScaleRangeArgs().second;
if (VScaleMax > 0)
MaxNumVScale = VScaleMax;
}

return MaxNumVScale * VF.getKnownMinValue();
}

unsigned getMaxInterleaveFactor(unsigned VF);
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5636,6 +5636,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {

// Limit MaxScalableVF by the maximum safe dependence distance.
Optional<unsigned> MaxVScale = TTI.getMaxVScale();
if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
unsigned VScaleMax = TheFunction->getFnAttribute(Attribute::VScaleRange)
.getVScaleRangeArgs()
.second;
if (VScaleMax > 0)
MaxVScale = VScaleMax;
}
MaxScalableVF = ElementCount::getScalable(
MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
if (!MaxScalableVF)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s

define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
; CHECK-LABEL: 'masked_gathers'
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s

define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
; CHECK-LABEL: 'masked_scatters'
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
; return a;
; }
;
define i32 @PR33613(double* %b, double %j, i32 %d) {
define i32 @PR33613(double* %b, double %j, i32 %d) #0 {
; CHECK-VF4UF2-LABEL: @PR33613
; CHECK-VF4UF2: vector.body
; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi <vscale x 4 x double> [ {{.*}}, %vector.ph ], [ {{.*}}, %vector.body ]
Expand Down Expand Up @@ -66,7 +66,7 @@ for.body:
; }
;
; Check that the sext sank after the load in the vector loop.
define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {
define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) #0 {
; CHECK-VF4UF1-LABEL: @PR34711
; CHECK-VF4UF1: vector.body
; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ]
Expand Down Expand Up @@ -100,5 +100,6 @@ for.end:
ret void
}

attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED

define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict
; CHECK-ORDERED: vector.body:
; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
Expand Down Expand Up @@ -49,7 +49,7 @@ for.end:
ret float %add
}

define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) {
define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict_unroll
; CHECK-ORDERED: vector.body:
; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4:.*]], %vector.body ]
Expand Down Expand Up @@ -113,7 +113,7 @@ for.end:
ret float %add
}

define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict_interleave
; CHECK-ORDERED: entry
; CHECK-ORDERED: %[[ARRAYIDX:.*]] = getelementptr inbounds float, float* %a, i64 1
Expand Down Expand Up @@ -206,7 +206,7 @@ for.end:
ret void
}

define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_of_sum
; CHECK-ORDERED: vector.body
; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
Expand Down Expand Up @@ -268,7 +268,7 @@ for.end: ; preds = %for.body, %entry
ret float %res
}

define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_conditional
; CHECK-ORDERED: vector.body
; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
Expand Down Expand Up @@ -343,7 +343,7 @@ for.end:
}

; Negative test - loop contains multiple fadds which we cannot safely reorder
define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {
define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_multiple
; CHECK-ORDERED-NOT: vector.body

Expand Down Expand Up @@ -390,6 +390,7 @@ for.end: ; preds = %for.body
ret float %rdx
}

attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !3, !6, !8}
!1 = distinct !{!1, !3, !7, !8}
!2 = distinct !{!2, !4, !6, !8}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; Test that the MaxVF for the following loop, that has no dependence distances,
; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
; (maximized bandwidth for i8 in the loop).
define void @test0(i32* %a, i8* %b, i32* %c) {
define void @test0(i32* %a, i8* %b, i32* %c) #0 {
; CHECK: LV: Checking a loop in "test0"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
Expand Down Expand Up @@ -40,7 +40,7 @@ exit:

; Test that the MaxVF for the following loop, with a dependence distance
; of 64 elements, is calculated as (maxvscale = 16) * 4.
define void @test1(i32* %a, i8* %b) {
define void @test1(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test1"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
Expand Down Expand Up @@ -74,7 +74,7 @@ exit:

; Test that the MaxVF for the following loop, with a dependence distance
; of 32 elements, is calculated as (maxvscale = 16) * 2.
define void @test2(i32* %a, i8* %b) {
define void @test2(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test2"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
Expand Down Expand Up @@ -108,7 +108,7 @@ exit:

; Test that the MaxVF for the following loop, with a dependence distance
; of 16 elements, is calculated as (maxvscale = 16) * 1.
define void @test3(i32* %a, i8* %b) {
define void @test3(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test3"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
Expand Down Expand Up @@ -142,7 +142,7 @@ exit:

; Test the fallback mechanism when scalable vectors are not feasible due
; to e.g. dependence distance.
define void @test4(i32* %a, i32* %b) {
define void @test4(i32* %a, i32* %b) #0 {
; CHECK: LV: Checking a loop in "test4"
; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
Expand Down Expand Up @@ -172,3 +172,5 @@ loop:
exit:
ret void
}

attributes #0 = { vscale_range(0, 16) }
17 changes: 9 additions & 8 deletions llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test1
; CHECK: <4 x i32>
define void @test1(i32* %a, i32* %b) {
define void @test1(i32* %a, i32* %b) #0 {
entry:
br label %loop

Expand Down Expand Up @@ -90,7 +90,7 @@ exit:
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test2
; CHECK: <4 x i32>
define void @test2(i32* %a, i32* %b) {
define void @test2(i32* %a, i32* %b) #0 {
entry:
br label %loop

Expand Down Expand Up @@ -138,7 +138,7 @@ exit:
; CHECK-DBG: LV: Using user VF vscale x 2.
; CHECK-LABEL: @test3
; CHECK: <vscale x 2 x i32>
define void @test3(i32* %a, i32* %b) {
define void @test3(i32* %a, i32* %b) #0 {
entry:
br label %loop

Expand Down Expand Up @@ -190,7 +190,7 @@ exit:
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test4
; CHECK: <4 x i32>
define void @test4(i32* %a, i32* %b) {
define void @test4(i32* %a, i32* %b) #0 {
entry:
br label %loop

Expand Down Expand Up @@ -238,7 +238,7 @@ exit:
; CHECK-DBG: LV: Using user VF vscale x 4
; CHECK-LABEL: @test5
; CHECK: <vscale x 4 x i32>
define void @test5(i32* %a, i32* %b) {
define void @test5(i32* %a, i32* %b) #0 {
entry:
br label %loop

Expand Down Expand Up @@ -289,7 +289,7 @@ exit:
; CHECK-DBG: Selecting VF: vscale x 4.
; CHECK-LABEL: @test6
; CHECK: <vscale x 4 x i32>
define void @test6(i32* %a, i32* %b) {
define void @test6(i32* %a, i32* %b) #0 {
entry:
br label %loop

Expand Down Expand Up @@ -322,7 +322,7 @@ exit:
; CHECK-NO-SVE-LABEL: @test_no_sve
; CHECK-NO-SVE: <4 x i32>
; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
define void @test_no_sve(i32* %a, i32* %b) {
define void @test_no_sve(i32* %a, i32* %b) #0 {
entry:
br label %loop

Expand Down Expand Up @@ -356,7 +356,7 @@ exit:
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test_no_max_vscale
; CHECK: <4 x i32>
define void @test_no_max_vscale(i32* %a, i32* %b) {
define void @test_no_max_vscale(i32* %a, i32* %b) #0 {
entry:
br label %loop

Expand All @@ -378,6 +378,7 @@ exit:
ret void
}

attributes #0 = { vscale_range(0, 16) }
!21 = !{!21, !22, !23}
!22 = !{!"llvm.loop.vectorize.width", i32 4}
!23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
Loading

0 comments on commit ef198cd

Please sign in to comment.