diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e667d0cca19f7..9cda43e58d27a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ObjCARCUtil.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" @@ -7362,6 +7363,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, else if (auto *ES = dyn_cast(CLI.Callee)) CalleeAttrs = SMEAttrs(ES->getSymbol()); + auto DescribeCallsite = + [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & { + R << "call from '" << ore::NV("Caller", MF.getName()) << "' to '"; + if (auto *ES = dyn_cast(CLI.Callee)) + R << ore::NV("Callee", ES->getSymbol()); + else if (CLI.CB && CLI.CB->getCalledFunction()) + R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName()); + else + R << "unknown callee"; + R << "'"; + return R; + }; + bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs); if (RequiresLazySave) { SDValue NumZaSaveSlices; @@ -7387,13 +7401,38 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, ISD::INTRINSIC_VOID, DL, MVT::Other, Chain, DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32), TPIDR2ObjAddr); + OptimizationRemarkEmitter ORE(&MF.getFunction()); + ORE.emit([&]() { + auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMELazySaveZA", + CLI.CB) + : OptimizationRemarkAnalysis("sme", "SMELazySaveZA", + &MF.getFunction()); + DescribeCallsite(R) << " sets up a lazy save for ZA"; + if (CalleeAttrs.preservesZA()) + R << ", but callee preserves ZA, so we request 0 slices to be saved"; + else + R << ", and we request that all slices be saved"; + R << ore::setExtraArgs() + << ore::NV("CalleePreservesZA", CalleeAttrs.preservesZA()); + return R; + }); } SDValue PStateSM; std::optional RequiresSMChange = CallerAttrs.requiresSMChange(CalleeAttrs); - if (RequiresSMChange) + if (RequiresSMChange) { PStateSM = getPStateSM(DAG, Chain, CallerAttrs, DL, MVT::i64); + OptimizationRemarkEmitter ORE(&MF.getFunction()); + ORE.emit([&]() { + auto R = CLI.CB ? OptimizationRemarkAnalysis("sme", "SMETransition", + CLI.CB) + : OptimizationRemarkAnalysis("sme", "SMETransition", + &MF.getFunction()); + DescribeCallsite(R) << " requires a streaming mode transition"; + return R; + }); + } // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll new file mode 100644 index 0000000000000..6762a768fd5bd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s + +declare void @private_za_callee() +declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved" +declare float @llvm.cos.f32(float) + +define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" { +; CHECK: remark: :0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved + call void @private_za_callee() + ret void +} + +define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" { +; CHECK: remark: :0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved + call void @private_za_callee() +; CHECK: remark: :0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved + call void @private_za_callee() + ret void +} + +define void @test_lazy_save_preserved_callee() nounwind "aarch64_pstate_za_shared" { +; CHECK: remark: :0:0: call from 'test_lazy_save_preserved_callee' to 'private_za_preserved_callee' sets up a lazy save for ZA, but callee preserves ZA, so we request 0 slices to be saved + call void @private_za_preserved_callee() + ret void +} + +define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" { +; CHECK: remark: :0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA, and we request that all slices be saved + %res = call float @llvm.cos.f32(float %a) + ret float %res +} diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll new file mode 100644 index 0000000000000..e1a474d898233 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface-remarks.ll @@ -0,0 +1,90 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve -verify-machineinstrs --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s + +declare void @normal_callee() +declare void @streaming_callee() "aarch64_pstate_sm_enabled" +declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" + +; CHECK: remark: :0:0: call from 'normal_caller_streaming_callee' to 'streaming_callee' requires a streaming mode transition +define void @normal_caller_streaming_callee() nounwind { + call void @streaming_callee() + ret void; +} + +; CHECK: remark: :0:0: call from 'streaming_caller_normal_callee' to 'normal_callee' requires a streaming mode transition +define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" { + call void @normal_callee() + ret void; +} + +; CHECK-NOT: streaming_caller_streaming_callee +define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" { + call void @streaming_callee() + ret void; +} + +; CHECK-NOT: streaming_caller_streaming_compatible_callee +define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" { + call void @streaming_compatible_callee() + ret void; +} + +; CHECK: remark: :0:0: call from 'call_to_function_pointer_streaming_enabled' to 'unknown callee' requires a streaming mode transition +define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind { + call void %p() "aarch64_pstate_sm_enabled" + ret void +} + +; CHECK: remark: :0:0: call from 'smstart_clobber_simdfp' to 'streaming_callee' requires a streaming mode transition +define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind { + call void @streaming_callee() + ret <4 x i32> %x; +} + +; CHECK: remark: :0:0: call from 'smstart_clobber_sve' to 'streaming_callee' requires a streaming mode transition +define @smstart_clobber_sve( %x) nounwind { + call void @streaming_callee() + ret %x; +} + +; CHECK: remark: :0:0: call from 'smstart_clobber_sve_duplicate' to 'streaming_callee' requires a streaming mode transition +; CHECK: remark: :0:0: call from 'smstart_clobber_sve_duplicate' to 'streaming_callee' requires a streaming mode transition +define @smstart_clobber_sve_duplicate( %x) nounwind { + call void @streaming_callee() + call void @streaming_callee() + ret %x; +} + +; CHECK: remark: :0:0: call from 'call_to_intrinsic_without_chain' to 'cos' requires a streaming mode transition +define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" { +entry: + %res = call fast double @llvm.cos.f64(double %x) + %res.fadd = fadd fast double %res, %x + ret double %res.fadd +} + +declare double @llvm.cos.f64(double) + +; CHECK: remark: :0:0: call from 'disable_tailcallopt' to 'streaming_callee' requires a streaming mode transition +define void @disable_tailcallopt() nounwind { + tail call void @streaming_callee() + ret void; +} + +; CHECK: remark: :0:0: call from 'call_to_non_streaming_pass_sve_objects' to 'foo' requires a streaming mode transition +define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 { +entry: + %Data1 = alloca , align 16 + %Data2 = alloca , align 16 + %Data3 = alloca , align 16 + %0 = tail call i64 @llvm.aarch64.sme.cntsb() + call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0) + %1 = load , ptr %Data1, align 16 + %vecext = extractelement %1, i64 0 + ret i8 %vecext +} + +declare i64 @llvm.aarch64.sme.cntsb() + +declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef) + +attributes #0 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" }