Skip to content

Commit

Permalink
[AArch64][SME] Add remarks to flag lazy ZA saves, and SMSTART/SMSTOP …
Browse files Browse the repository at this point in the history
…transitions (#68255)
  • Loading branch information
jroelofs committed Oct 6, 2023
1 parent ffdae1a commit 2c0b6f2
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 1 deletion.
41 changes: 40 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
Expand Down Expand Up @@ -7362,6 +7363,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
CalleeAttrs = SMEAttrs(ES->getSymbol());

auto DescribeCallsite =
[&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
R << "call from '" << ore::NV("Caller", MF.getName()) << "' to '";
if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
R << ore::NV("Callee", ES->getSymbol());
else if (CLI.CB && CLI.CB->getCalledFunction())
R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName());
else
R << "unknown callee";
R << "'";
return R;
};

bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs);
if (RequiresLazySave) {
SDValue NumZaSaveSlices;
Expand All @@ -7387,13 +7401,38 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
ISD::INTRINSIC_VOID, DL, MVT::Other, Chain,
DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
TPIDR2ObjAddr);
OptimizationRemarkEmitter ORE(&MF.getFunction());
ORE.emit([&]() {
auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
CLI.CB)
: OptimizationRemarkAnalysis("sme", "SMELazySaveZA",
&MF.getFunction());
DescribeCallsite(R) << " sets up a lazy save for ZA";
if (CalleeAttrs.preservesZA())
R << ", but callee preserves ZA, so we request 0 slices to be saved";
else
R << ", and we request that all slices be saved";
R << ore::setExtraArgs()
<< ore::NV("CalleePreservesZA", CalleeAttrs.preservesZA());
return R;
});
}

SDValue PStateSM;
std::optional<bool> RequiresSMChange =
CallerAttrs.requiresSMChange(CalleeAttrs);
if (RequiresSMChange)
if (RequiresSMChange) {
PStateSM = getPStateSM(DAG, Chain, CallerAttrs, DL, MVT::i64);
OptimizationRemarkEmitter ORE(&MF.getFunction());
ORE.emit([&]() {
auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMETransition",
CLI.CB)
: OptimizationRemarkAnalysis("sme", "SMETransition",
&MF.getFunction());
DescribeCallsite(R) << " requires a streaming mode transition";
return R;
});
}

// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
Expand Down
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s

declare void @private_za_callee()
declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved"
declare float @llvm.cos.f32(float)

define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
call void @private_za_callee()
ret void
}

define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
call void @private_za_callee()
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved
call void @private_za_callee()
ret void
}

define void @test_lazy_save_preserved_callee() nounwind "aarch64_pstate_za_shared" {
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_preserved_callee' to 'private_za_preserved_callee' sets up a lazy save for ZA, but callee preserves ZA, so we request 0 slices to be saved
call void @private_za_preserved_callee()
ret void
}

define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" {
; CHECK: remark: <unknown>:0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA, and we request that all slices be saved
%res = call float @llvm.cos.f32(float %a)
ret float %res
}
90 changes: 90 additions & 0 deletions llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve -verify-machineinstrs --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s

declare void @normal_callee()
declare void @streaming_callee() "aarch64_pstate_sm_enabled"
declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"

; CHECK: remark: <unknown>:0:0: call from 'normal_caller_streaming_callee' to 'streaming_callee' requires a streaming mode transition
define void @normal_caller_streaming_callee() nounwind {
call void @streaming_callee()
ret void;
}

; CHECK: remark: <unknown>:0:0: call from 'streaming_caller_normal_callee' to 'normal_callee' requires a streaming mode transition
define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" {
call void @normal_callee()
ret void;
}

; CHECK-NOT: streaming_caller_streaming_callee
define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" {
call void @streaming_callee()
ret void;
}

; CHECK-NOT: streaming_caller_streaming_compatible_callee
define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" {
call void @streaming_compatible_callee()
ret void;
}

; CHECK: remark: <unknown>:0:0: call from 'call_to_function_pointer_streaming_enabled' to 'unknown callee' requires a streaming mode transition
define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind {
call void %p() "aarch64_pstate_sm_enabled"
ret void
}

; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_simdfp' to 'streaming_callee' requires a streaming mode transition
define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind {
call void @streaming_callee()
ret <4 x i32> %x;
}

; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_sve' to 'streaming_callee' requires a streaming mode transition
define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) nounwind {
call void @streaming_callee()
ret <vscale x 4 x i32> %x;
}

; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_sve_duplicate' to 'streaming_callee' requires a streaming mode transition
; CHECK: remark: <unknown>:0:0: call from 'smstart_clobber_sve_duplicate' to 'streaming_callee' requires a streaming mode transition
define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x) nounwind {
call void @streaming_callee()
call void @streaming_callee()
ret <vscale x 4 x i32> %x;
}

; CHECK: remark: <unknown>:0:0: call from 'call_to_intrinsic_without_chain' to 'cos' requires a streaming mode transition
define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" {
entry:
%res = call fast double @llvm.cos.f64(double %x)
%res.fadd = fadd fast double %res, %x
ret double %res.fadd
}

declare double @llvm.cos.f64(double)

; CHECK: remark: <unknown>:0:0: call from 'disable_tailcallopt' to 'streaming_callee' requires a streaming mode transition
define void @disable_tailcallopt() nounwind {
tail call void @streaming_callee()
ret void;
}

; CHECK: remark: <unknown>:0:0: call from 'call_to_non_streaming_pass_sve_objects' to 'foo' requires a streaming mode transition
define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 {
entry:
%Data1 = alloca <vscale x 16 x i8>, align 16
%Data2 = alloca <vscale x 16 x i8>, align 16
%Data3 = alloca <vscale x 16 x i8>, align 16
%0 = tail call i64 @llvm.aarch64.sme.cntsb()
call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0)
%1 = load <vscale x 16 x i8>, ptr %Data1, align 16
%vecext = extractelement <vscale x 16 x i8> %1, i64 0
ret i8 %vecext
}

declare i64 @llvm.aarch64.sme.cntsb()

declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef)

attributes #0 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" }

0 comments on commit 2c0b6f2

Please sign in to comment.