Skip to content

Commit

Permalink
[AArch64][SME] Create new pass to remove COALESCER_BARRIER early. (#8…
Browse files Browse the repository at this point in the history
…5386)

The purpose of the COALESCER_BARRIER pseudo node is to prevent the
register coalescer from coalescing certain COPY instructions around
smstart/smstop instructions, so that we spill only the (required) FPR
register rather than the encompassing ZPR register.

The pseudos are removed in the AArch64ExpandPseudo pass. However,
because the node itself is a _use_ of a register, this occassionally
leads to redundant spills/fills, because the register allocator thinks
the virtual register is actually used before an smstart/smstop
instruction, causing it to be filled, at which points it requires
immediate spilling again to ensure it stays live over the smstart/smstop
instruction.

We can avoid that by removing the pseudo nodes right after coalescing,
but before register allocation.
  • Loading branch information
sdesmalen-arm authored Apr 15, 2024
1 parent 32cb3c5 commit c3d5886
Show file tree
Hide file tree
Showing 14 changed files with 134 additions and 52 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ FunctionPass *createFalkorMarkStridedAccessesPass();
FunctionPass *createAArch64PointerAuthPass();
FunctionPass *createAArch64BranchTargetsPass();
FunctionPass *createAArch64MIPeepholeOptPass();
FunctionPass *createAArch64PostCoalescerPass();

FunctionPass *createAArch64CleanupLocalDynamicTLSPass();

Expand Down Expand Up @@ -93,6 +94,7 @@ void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
void initializeAArch64MIPeepholeOptPass(PassRegistry &);
void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PostCoalescerPass(PassRegistry &);
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
void initializeAArch64PostSelectOptimizePass(PassRegistry &);
Expand Down
101 changes: 101 additions & 0 deletions llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
//===- AArch64PostCoalescerPass.cpp - AArch64 Post Coalescer pass ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//

#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/InitializePasses.h"

using namespace llvm;

#define DEBUG_TYPE "aarch64-post-coalescer-pass"

namespace {

struct AArch64PostCoalescer : public MachineFunctionPass {
static char ID;

AArch64PostCoalescer() : MachineFunctionPass(ID) {
initializeAArch64PostCoalescerPass(*PassRegistry::getPassRegistry());
}

LiveIntervals *LIS;
MachineRegisterInfo *MRI;

bool runOnMachineFunction(MachineFunction &MF) override;

StringRef getPassName() const override {
return "AArch64 Post Coalescer pass";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
AU.addRequired<LiveIntervals>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};

char AArch64PostCoalescer::ID = 0;

} // end anonymous namespace

INITIALIZE_PASS_BEGIN(AArch64PostCoalescer, "aarch64-post-coalescer-pass",
"AArch64 Post Coalescer Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(AArch64PostCoalescer, "aarch64-post-coalescer-pass",
"AArch64 Post Coalescer Pass", false, false)

bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
if (!FuncInfo->hasStreamingModeChanges())
return false;

MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
bool Changed = false;

for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : make_early_inc_range(MBB)) {
switch (MI.getOpcode()) {
default:
break;
case AArch64::COALESCER_BARRIER_FPR16:
case AArch64::COALESCER_BARRIER_FPR32:
case AArch64::COALESCER_BARRIER_FPR64:
case AArch64::COALESCER_BARRIER_FPR128: {
Register Src = MI.getOperand(1).getReg();
Register Dst = MI.getOperand(0).getReg();
if (Src != Dst)
MRI->replaceRegWith(Dst, Src);

// MI must be erased from the basic block before recalculating the live
// interval.
LIS->RemoveMachineInstrFromMaps(MI);
MI.eraseFromParent();

LIS->removeInterval(Src);
LIS->createAndComputeVirtRegInterval(Src);

Changed = true;
break;
}
}
}
}

return Changed;
}

FunctionPass *llvm::createAArch64PostCoalescerPass() {
return new AArch64PostCoalescer();
}
7 changes: 7 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64O0PreLegalizerCombinerPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
initializeAArch64PointerAuthPass(*PR);
initializeAArch64PostCoalescerPass(*PR);
initializeAArch64PostLegalizerCombinerPass(*PR);
initializeAArch64PostLegalizerLoweringPass(*PR);
initializeAArch64PostSelectOptimizePass(*PR);
Expand Down Expand Up @@ -539,6 +540,7 @@ class AArch64PassConfig : public TargetPassConfig {
void addPreEmitPass() override;
void addPostBBSections() override;
void addPreEmitPass2() override;
bool addRegAssignAndRewriteOptimized() override;

std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
Expand Down Expand Up @@ -876,6 +878,11 @@ void AArch64PassConfig::addPreEmitPass2() {
addPass(createUnpackMachineBundles(nullptr));
}

bool AArch64PassConfig::addRegAssignAndRewriteOptimized() {
addPass(createAArch64PostCoalescerPass());
return TargetPassConfig::addRegAssignAndRewriteOptimized();
}

MachineFunctionInfo *AArch64TargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ add_llvm_target(AArch64CodeGen
AArch64MIPeepholeOpt.cpp
AArch64MCInstLower.cpp
AArch64PointerAuth.cpp
AArch64PostCoalescerPass.cpp
AArch64PromoteConstant.cpp
AArch64PBQPRegAlloc.cpp
AArch64RegisterInfo.cpp
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@
; CHECK-NEXT: Register Coalescer
; CHECK-NEXT: Rename Disconnected Subregister Components
; CHECK-NEXT: Machine Instruction Scheduler
; CHECK-NEXT: AArch64 Post Coalescer pass
; CHECK-NEXT: Machine Block Frequency Analysis
; CHECK-NEXT: Debug Variable Analysis
; CHECK-NEXT: Live Stack Slot Analysis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ define void @dont_coalesce_args(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind
; CHECK-REGALLOC: bb.0 (%ir-block.0):
; CHECK-REGALLOC-NEXT: liveins: $q0
; CHECK-REGALLOC-NEXT: {{ $}}
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0
Expand Down Expand Up @@ -61,7 +60,6 @@ define <2 x i64> @dont_coalesce_res() "aarch64_pstate_sm_body" nounwind {
; CHECK-REGALLOC-NEXT: BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL renamable $q0, implicit killed $z0
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
Expand Down Expand Up @@ -94,17 +92,13 @@ define <2 x i64> @dont_coalesce_arg_that_is_also_res(<2 x i64> %a) "aarch64_psta
; CHECK-REGALLOC: bb.0 (%ir-block.0):
; CHECK-REGALLOC-NEXT: liveins: $q0
; CHECK-REGALLOC-NEXT: {{ $}}
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0
; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-REGALLOC-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: RET_ReallyLR implicit $q0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ define void @streaming_compatible_arg(float %f) #0 {
; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: tbz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,9 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-COMMON-NEXT: bl __addtf3
; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
Expand Down Expand Up @@ -392,9 +392,9 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: stp s1, s0, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: ldp s1, s0, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl fmodf
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
Expand Down Expand Up @@ -422,9 +422,7 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl __arm_sme_state
; CHECK-COMMON-NEXT: ldp s2, s0, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: and x19, x0, #0x1
; CHECK-COMMON-NEXT: stp s2, s0, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: smstop sm
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,6 @@ define void @dont_coalesce_res_f32(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1116,7 +1115,6 @@ define void @dont_coalesce_res_f64(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1151,7 +1149,6 @@ define void @dont_coalesce_res_v1i8(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1183,7 +1180,6 @@ define void @dont_coalesce_res_v1i16(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1215,7 +1211,6 @@ define void @dont_coalesce_res_v1i32(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1247,7 +1242,6 @@ define void @dont_coalesce_res_v1i64(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1311,7 +1305,6 @@ define void @dont_coalesce_res_v1f32(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down Expand Up @@ -1343,7 +1336,6 @@ define void @dont_coalesce_res_v1f64(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,27 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
; CHECK-LABEL: sm_body_sm_compatible_simple:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #96
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x8, x0, #0x1
; CHECK-NEXT: tbnz w8, #0, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: fmov s0, wzr
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: tbnz w8, #0, .LBB0_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fmov s0, wzr
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
ret float zeroinitializer
}
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/AArch64/sme-streaming-body.ll
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,11 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl cos
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -469,11 +469,7 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr
; CHECK-NEXT: mov x9, x0
; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: ldp s4, s0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: stp s4, s0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: ldp d4, d0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: stp d4, d0, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: tbz w19, #0, .LBB10_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: smstop sm
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
Original file line number Diff line number Diff line change
Expand Up @@ -405,11 +405,11 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr
; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: stp s1, s0, [sp, #24] // 8-byte Folded Spill
; CHECK-NEXT: stp d3, d2, [sp, #8] // 16-byte Folded Spill
; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldp s1, s0, [sp, #24] // 8-byte Folded Reload
; CHECK-NEXT: ldp d3, d2, [sp, #8] // 16-byte Folded Reload
; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: bl bar
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
Expand Down
Loading

0 comments on commit c3d5886

Please sign in to comment.