Skip to content

Commit

Permalink
[AArch64] Force streaming-compatible codegen when attributes are set.
Browse files Browse the repository at this point in the history
Before this patch, the only way to generate streaming-compatible code
was to use the `-force-streaming-compatible-sve` flag, but the compiler
should also avoid the use of instructions invalid in streaming mode
when a function has the aarch64_pstate_sm_enabled/compatible attribute.

Reviewed By: paulwalker-arm, david-arm

Differential Revision: https://reviews.llvm.org/D155428
  • Loading branch information
sdesmalen-arm committed Jul 18, 2023
1 parent e972562 commit 08fd44b
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 36 deletions.
13 changes: 10 additions & 3 deletions llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,13 +292,15 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride,
unsigned MaxSVEVectorSizeInBitsOverride,
bool StreamingSVEModeDisabled)
bool StreamingSVEMode,
bool StreamingCompatibleSVEMode)
: AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian),
StreamingSVEModeDisabled(StreamingSVEModeDisabled),
StreamingSVEMode(StreamingSVEMode),
StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
Expand Down Expand Up @@ -479,5 +481,10 @@ bool AArch64Subtarget::isNeonAvailable() const {
if (!hasNEON())
return false;

return !ForceStreamingCompatibleSVE;
// The 'force-streaming-comaptible-sve' flag overrides the streaming
// function attributes.
if (ForceStreamingCompatibleSVE.getNumOccurrences() > 0)
return !ForceStreamingCompatibleSVE;

return !isStreaming() && !isStreamingCompatible();
}
16 changes: 11 additions & 5 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {

bool IsLittle;

bool StreamingSVEModeDisabled;
bool StreamingSVEMode;
bool StreamingCompatibleSVEMode;
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
unsigned VScaleForTuning = 2;
Expand Down Expand Up @@ -163,7 +164,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
StringRef FS, const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0,
bool StreamingSVEModeDisabled = true);
bool StreamingSVEMode = false,
bool StreamingCompatibleSVEMode = false);

// Getters for SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
Expand Down Expand Up @@ -202,14 +204,20 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {

bool isXRaySupported() const override { return true; }

/// Returns true if the function has the streaming attribute.
bool isStreaming() const { return StreamingSVEMode; }

/// Returns true if the function has the streaming-compatible attribute.
bool isStreamingCompatible() const { return StreamingCompatibleSVEMode; }

/// Returns true if the target has NEON and the function at runtime is known
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
/// mode, which disables NEON instructions).
bool isNeonAvailable() const;

unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0.
if (!isStreamingSVEModeDisabled())
if (StreamingSVEMode || StreamingCompatibleSVEMode)
return 0;
return MinVectorRegisterBitWidth;
}
Expand Down Expand Up @@ -416,8 +424,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
return "__security_check_cookie_arm64ec";
return "__security_check_cookie";
}

bool isStreamingSVEModeDisabled() const { return StreamingSVEModeDisabled; }
};
} // End llvm namespace

Expand Down
23 changes: 16 additions & 7 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,10 +391,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU;
StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;

bool StreamingSVEModeDisabled =
!F.hasFnAttribute("aarch64_pstate_sm_enabled") &&
!F.hasFnAttribute("aarch64_pstate_sm_compatible") &&
!F.hasFnAttribute("aarch64_pstate_sm_body");
bool StreamingSVEMode = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
F.hasFnAttribute("aarch64_pstate_sm_body");
bool StreamingCompatibleSVEMode =
F.hasFnAttribute("aarch64_pstate_sm_compatible");

unsigned MinSVEVectorSize = 0;
unsigned MaxSVEVectorSize = 0;
Expand Down Expand Up @@ -427,8 +427,11 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {

SmallString<512> Key;
raw_svector_ostream(Key) << "SVEMin" << MinSVEVectorSize << "SVEMax"
<< MaxSVEVectorSize << "StreamingSVEModeDisabled="
<< StreamingSVEModeDisabled << CPU << TuneCPU << FS;
<< MaxSVEVectorSize
<< "StreamingSVEMode=" << StreamingSVEMode
<< "StreamingCompatibleSVEMode="
<< StreamingCompatibleSVEMode << CPU << TuneCPU
<< FS;

auto &I = SubtargetMap[Key];
if (!I) {
Expand All @@ -438,8 +441,14 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
resetTargetOptions(F);
I = std::make_unique<AArch64Subtarget>(
TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
MaxSVEVectorSize, StreamingSVEModeDisabled);
MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode);
}

assert((!StreamingSVEMode || I->hasSME()) &&
"Expected SME to be available");
assert((!StreamingCompatibleSVEMode || I->hasSVEorSME()) &&
"Expected SVE or SME to be available");

return I.get();
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1925,8 +1925,7 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(64);
case TargetTransformInfo::RGK_FixedWidthVector:
if (!ST->isStreamingSVEModeDisabled() &&
!EnableFixedwidthAutovecInStreamingMode)
if (!ST->isNeonAvailable() && !EnableFixedwidthAutovecInStreamingMode)
return TypeSize::getFixed(0);

if (ST->hasSVE())
Expand All @@ -1935,7 +1934,8 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {

return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
case TargetTransformInfo::RGK_ScalableVector:
if (!ST->isStreamingSVEModeDisabled() && !EnableScalableAutovecInStreamingMode)
if ((ST->isStreaming() || ST->isStreamingCompatible()) &&
!EnableScalableAutovecInStreamingMode)
return TypeSize::getScalable(0);

return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
Expand Down
42 changes: 25 additions & 17 deletions llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
Original file line number Diff line number Diff line change
Expand Up @@ -123,34 +123,42 @@ define void @streaming_compatible_caller_and_callee() "aarch64_pstate_sm_compati
define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "aarch64_pstate_sm_compatible" nounwind #0 {
; CHECK-LABEL: streaming_compatible_with_neon_vectors:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112
; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB4_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: bl normal_callee_vec_arg
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: tbz x19, #0, .LBB4_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4:
; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: fadd v0.2d, v1.2d, v0.2d
; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = call <2 x double> @normal_callee_vec_arg(<2 x double> %arg)
%fadd = fadd <2 x double> %res, %arg
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize -force-streaming-compatible-sve -mattr=+sve -force-target-instruction-cost=1 -scalable-vectorization=off -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=SC_SVE
; RUN: opt < %s -passes=loop-vectorize -force-streaming-compatible-sve -enable-fixedwidth-autovec-in-streaming-mode -mattr=+sve -force-target-instruction-cost=1 -scalable-vectorization=off -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=SC_SVE
; RUN: opt < %s -passes=loop-vectorize -mattr=+sve -force-target-instruction-cost=1 -scalable-vectorization=off -force-vector-interleave=1 -S 2>&1 | FileCheck %s --check-prefix=NO_SC_SVE

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
Expand Down

0 comments on commit 08fd44b

Please sign in to comment.