diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8a6f1dc7487ba..7b9b6a7a42812 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7609,16 +7609,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs); if (RequiresLazySave) { - SDValue NumZaSaveSlices; - if (!CalleeAttrs.preservesZA()) { - // Set up a lazy save mechanism by storing the runtime live slices - // (worst-case SVL) to the TPIDR2 stack object. - NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, - DAG.getConstant(1, DL, MVT::i32)); - } else if (CalleeAttrs.preservesZA()) { - NumZaSaveSlices = DAG.getConstant(0, DL, MVT::i64); - } - unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj(); MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj); SDValue TPIDR2ObjAddr = DAG.getFrameIndex(TPIDR2Obj, @@ -7626,6 +7616,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue NumZaSaveSlicesAddr = DAG.getNode(ISD::ADD, DL, TPIDR2ObjAddr.getValueType(), TPIDR2ObjAddr, DAG.getConstant(8, DL, TPIDR2ObjAddr.getValueType())); + SDValue NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, + DAG.getConstant(1, DL, MVT::i32)); Chain = DAG.getTruncStore(Chain, DL, NumZaSaveSlices, NumZaSaveSlicesAddr, MPI, MVT::i16); Chain = DAG.getNode( @@ -7638,14 +7630,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, CLI.CB) : OptimizationRemarkAnalysis("sme", "SMELazySaveZA", &MF.getFunction()); - DescribeCallsite(R) << " sets up a lazy save for ZA"; - if (CalleeAttrs.preservesZA()) - R << ", but callee preserves ZA, so we request 0 slices to be saved"; - else - R << ", and we request that all slices be saved"; - R << ore::setExtraArgs() - << ore::NV("CalleePreservesZA", CalleeAttrs.preservesZA()); - return R; + return DescribeCallsite(R) << " sets up a lazy save for ZA"; }); } @@ -8081,34 +8066,33 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } if (RequiresLazySave) { - if (!CalleeAttrs.preservesZA()) { - // Unconditionally resume ZA. - Result = DAG.getNode( - AArch64ISD::SMSTART, DL, MVT::Other, Result, - DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64)); - - // Conditionally restore the lazy save using a pseudo node. - unsigned FI = FuncInfo->getLazySaveTPIDR2Obj(); - SDValue RegMask = DAG.getRegisterMask( - TRI->SMEABISupportRoutinesCallPreservedMaskFromX0()); - SDValue RestoreRoutine = DAG.getTargetExternalSymbol( - "__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout())); - SDValue TPIDR2_EL0 = DAG.getNode( - ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result, - DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32)); - - // Copy the address of the TPIDR2 block into X0 before 'calling' the - // RESTORE_ZA pseudo. - SDValue Glue; - SDValue TPIDR2Block = DAG.getFrameIndex( - FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); - Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue); - Result = DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other, - {Result, TPIDR2_EL0, - DAG.getRegister(AArch64::X0, MVT::i64), - RestoreRoutine, RegMask, Result.getValue(1)}); - } + // Unconditionally resume ZA. + Result = DAG.getNode( + AArch64ISD::SMSTART, DL, MVT::Other, Result, + DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRZA), DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64)); + + // Conditionally restore the lazy save using a pseudo node. + unsigned FI = FuncInfo->getLazySaveTPIDR2Obj(); + SDValue RegMask = DAG.getRegisterMask( + TRI->SMEABISupportRoutinesCallPreservedMaskFromX0()); + SDValue RestoreRoutine = DAG.getTargetExternalSymbol( + "__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout())); + SDValue TPIDR2_EL0 = DAG.getNode( + ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result, + DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32)); + + // Copy the address of the TPIDR2 block into X0 before 'calling' the + // RESTORE_ZA pseudo. + SDValue Glue; + SDValue TPIDR2Block = DAG.getFrameIndex( + FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); + Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue); + Result = + DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other, + {Result, TPIDR2_EL0, DAG.getRegister(AArch64::X0, MVT::i64), + RestoreRoutine, RegMask, Result.getValue(1)}); + // Finally reset the TPIDR2_EL0 register to 0. Result = DAG.getNode( ISD::INTRINSIC_VOID, DL, MVT::Other, Result, diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll index 6762a768fd5bd..d999311301f94 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call-remarks.ll @@ -2,31 +2,24 @@ ; RUN: llc -mtriple=aarch64 -mattr=+sme --pass-remarks-analysis=sme -o /dev/null < %s 2>&1 | FileCheck %s declare void @private_za_callee() -declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved" declare float @llvm.cos.f32(float) define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" { -; CHECK: remark: :0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved +; CHECK: remark: :0:0: call from 'test_lazy_save_1_callee' to 'private_za_callee' sets up a lazy save for ZA call void @private_za_callee() ret void } define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" { -; CHECK: remark: :0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved +; CHECK: remark: :0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA call void @private_za_callee() -; CHECK: remark: :0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA, and we request that all slices be saved +; CHECK: remark: :0:0: call from 'test_lazy_save_2_callees' to 'private_za_callee' sets up a lazy save for ZA call void @private_za_callee() ret void } -define void @test_lazy_save_preserved_callee() nounwind "aarch64_pstate_za_shared" { -; CHECK: remark: :0:0: call from 'test_lazy_save_preserved_callee' to 'private_za_preserved_callee' sets up a lazy save for ZA, but callee preserves ZA, so we request 0 slices to be saved - call void @private_za_preserved_callee() - ret void -} - define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" { -; CHECK: remark: :0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA, and we request that all slices be saved +; CHECK: remark: :0:0: call from 'test_lazy_save_expanded_intrinsic' to 'cosf' sets up a lazy save for ZA %res = call float @llvm.cos.f32(float %a) ret float %res } diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll index a173fd40feb28..9625e139bd0bc 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll @@ -2,7 +2,6 @@ ; RUN: llc -mtriple=aarch64 -mattr=+sme < %s | FileCheck %s declare void @private_za_callee() -declare void @private_za_preserved_callee() "aarch64_pstate_za_preserved" declare float @llvm.cos.f32(float) ; Test lazy-save mechanism for a single callee. @@ -170,48 +169,3 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z call void @private_za_callee() ret void } - - -; Test lazy-save mechanism for an aarch64_pstate_za_shared caller -; calling a callee with aarch64_pstate_za_preserved. -define void @za_shared_caller_za_preserved_callee() nounwind "aarch64_pstate_za_shared" "aarch64_pstate_sm_compatible" { -; CHECK-LABEL: za_shared_caller_za_preserved_callee: -; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: add x29, sp, #64 -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: rdsvl x8, #1 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: msub x8, x8, x8, x9 -; CHECK-NEXT: mov sp, x8 -; CHECK-NEXT: sub x9, x29, #80 -; CHECK-NEXT: stp x8, xzr, [x29, #-80] -; CHECK-NEXT: msr TPIDR2_EL0, x9 -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: and x19, x0, #0x1 -; CHECK-NEXT: tbz w19, #0, .LBB4_2 -; CHECK-NEXT: // %bb.1: -; CHECK-NEXT: smstop sm -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: bl private_za_preserved_callee -; CHECK-NEXT: tbz w19, #0, .LBB4_4 -; CHECK-NEXT: // %bb.3: -; CHECK-NEXT: smstart sm -; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: msr TPIDR2_EL0, xzr -; CHECK-NEXT: sub sp, x29, #64 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload -; CHECK-NEXT: ret - call void @private_za_preserved_callee() - ret void -}