Skip to content

Commit

Permalink
[AArch64][SVE] Allow accesses to SVE stack objects to use frame pointer
Browse files Browse the repository at this point in the history
The layout of the stack frame for SVE means that using the frame pointer
rather than the stack pointer for an access to an SVE stack object
removes the need for an additional add to jump over the non-SVE objects.

Likewise the opposite is true for non-SVE stack objects.

This patch allows for the former to be done by having HasFP return true
in the presence of both SVE and non-SVE stack objects, and also fixes a
minor issue whereby the later would not be done for certain offsets.
  • Loading branch information
brads55 committed Jan 28, 2021
1 parent 0805e40 commit 4263585
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 52 deletions.
17 changes: 13 additions & 4 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
/// pointer register.
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
// Win64 EH requires a frame pointer if funclets are present, as the locals
// are accessed off the frame pointer in both the parent function and the
Expand All @@ -320,6 +321,14 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
if (!MFI.isMaxCallFrameSizeComputed() ||
MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
return true;
// If there are both SVE and non-SVE objects on the stack, make the frame
// pointer available since it may be more performant to use it.
uint64_t CalleeStackSize = AFI->isCalleeSavedStackSizeComputed()
? AFI->getCalleeSavedStackSize()
: 0;
uint64_t NonSVEStackSize = MFI.getStackSize() - CalleeStackSize;
if (AFI->getStackSizeSVE() && NonSVEStackSize)
return true;

return false;
}
Expand Down Expand Up @@ -1883,10 +1892,6 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
// right thing for the emergency spill slot.
bool UseFP = false;
if (AFI->hasStackFrame() && !isSVE) {
// We shouldn't prefer using the FP when there is an SVE area
// in between the FP and the non-SVE locals/spills.
PreferFP &= !SVEStackSize;

// Note: Keeping the following as multiple 'if' statements rather than
// merging to a single expression for readability.
//
Expand All @@ -1907,6 +1912,10 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
bool FPOffsetFits = !ForSimm || FPOffset >= -256;
PreferFP |= Offset > -FPOffset;

// The FP offset will not fit if there is an SVE area in the way.
if (SVEStackSize && FPOffset < 0)
FPOffsetFits = false;

if (MFI.hasVarSizedObjects()) {
// If we have variable sized objects, we can use either FP or BP, as the
// SP offset is unknown. We can use the base pointer if we have one and
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
return getCalleeSavedStackSize();
}

bool isCalleeSavedStackSizeComputed() const {
return HasCalleeSavedStackSize;
}

unsigned getCalleeSavedStackSize() const {
assert(HasCalleeSavedStackSize &&
"CalleeSavedStackSize has not been calculated");
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@
# CHECK1: : DW_OP_breg31 WSP+16)
# CHECK1: DW_AT_type {{.*}}ty32
#
# CHECK2: : DW_OP_breg31 WSP+16, DW_OP_lit16, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus)
# CHECK2: : DW_OP_breg29 W29+0, DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus)
# CHECK2: DW_AT_type {{.*}}svint32_t
#
# CHECK3: : DW_OP_breg31 WSP+16, DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus)
# CHECK3: : DW_OP_breg29 W29+0, DW_OP_lit16, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus)
# CHECK3: DW_AT_type {{.*}}svint32_t
#
# CHECK4: : DW_OP_breg31 WSP+16, DW_OP_lit7, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus)
# CHECK4: : DW_OP_breg29 W29+0, DW_OP_lit17, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus)
# CHECK4: DW_AT_type {{.*}}svbool_t
#
# CHECK5: : DW_OP_breg31 WSP+16, DW_OP_lit6, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus)
# CHECK5: : DW_OP_breg29 W29+0, DW_OP_lit18, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus)
# CHECK5: DW_AT_type {{.*}}svbool_t

--- |
Expand Down
59 changes: 23 additions & 36 deletions llvm/test/CodeGen/AArch64/framelayout-sve.mir
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@

# CHECK: bb.0.entry:
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION
Expand All @@ -67,11 +68,9 @@
# CHECK-NEXT: RET_ReallyLR

# ASM-LABEL: test_allocate_sve:
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
# ASM-NEXT: .cfi_offset w29, -16
# ASM: .cfi_offset w29, -16
#
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
# UNWINDINFO: DW_CFA_offset: reg29 -16
name: test_allocate_sve
stack:
- { id: 0, stack-id: scalable-vector, size: 18, alignment: 2 }
Expand All @@ -96,6 +95,7 @@ body: |
# CHECK: bb.0.entry:
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -32
# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x20, $sp, 2
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-COUNT-4: frame-setup CFI_INSTRUCTION
Expand All @@ -109,13 +109,11 @@ body: |
# CHECK-NEXT: RET_ReallyLR
#
# ASM-LABEL: test_allocate_sve_gpr_callee_saves:
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG
# ASM-NEXT: .cfi_offset w20, -8
# ASM: .cfi_offset w20, -8
# ASM-NEXT: .cfi_offset w21, -16
# ASM-NEXT: .cfi_offset w29, -32
#
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_offset: reg20 -8
# UNWINDINFO: DW_CFA_offset: reg20 -8
# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32
name: test_allocate_sve_gpr_callee_saves
Expand Down Expand Up @@ -184,28 +182,24 @@ body: |

# CHECK: bb.0.entry:
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION

# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 2
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], 1
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
# CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 7
# CHECK-NEXT: STR_ZXI $z0, $fp, -1
# CHECK-NEXT: STR_ZXI $z1, $fp, -2
# CHECK-NEXT: STR_PXI $p0, $fp, -17

# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
# CHECK-NEXT: RET_ReallyLR
#
# ASM-LABEL: test_address_sve:
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG
# ASM-NEXT: .cfi_offset w29, -16
# ASM: .cfi_offset w29, -16
#
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
# UNWINDINFO: DW_CFA_offset: reg29 -16

name: test_address_sve
frameInfo:
Expand Down Expand Up @@ -298,24 +292,22 @@ body: |

# CHECK: bb.0.entry:
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION

# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
# CHECK-NEXT: $x0 = LDRXui $fp, 2

# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
# CHECK-NEXT: RET_ReallyLR
#
# ASM-LABEL: test_stack_arg_sve:
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
# ASM-NEXT: .cfi_offset w29, -16
# ASM: .cfi_offset w29, -16
#
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
# UNWINDINFO: DW_CFA_offset: reg29 -16

name: test_stack_arg_sve
fixedStack:
Expand Down Expand Up @@ -460,11 +452,9 @@ body: |
# CHECK: RET_ReallyLR
#
# ASM-LABEL: save_restore_pregs_sve:
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 8 * VG
# ASM-NEXT: .cfi_offset w29, -16
# ASM: .cfi_offset w29, -16
#
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
# UNWINDINFO: DW_CFA_offset: reg29 -16
name: save_restore_pregs_sve
stack:
- { id: 0, stack-id: default, size: 32, alignment: 16 }
Expand All @@ -480,6 +470,7 @@ body: |
...
# CHECK-LABEL: name: save_restore_zregs_sve
# CHECK: $sp = frame-setup STRXpre killed $fp, $sp, -16
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
# CHECK-NEXT: frame-setup STR_ZXI killed $z10, $sp, 0
# CHECK-NEXT: frame-setup STR_ZXI killed $z9, $sp, 1
Expand All @@ -496,13 +487,11 @@ body: |
# CHECK-NEXT: RET_ReallyLR
#
# ASM-LABEL: save_restore_zregs_sve:
# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 24 * VG
# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
# ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG

# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
Expand Down Expand Up @@ -558,8 +547,7 @@ body: |
# CHECK: RET_ReallyLR
#
# ASM-LABEL: save_restore_sve:
# ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG
# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
# ASM: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
# ASM-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 32 - 32 * VG
Expand All @@ -572,8 +560,7 @@ body: |
# ASM-NEXT: .cfi_offset w21, -24
# ASM-NEXT: .cfi_offset w29, -32
#
# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
# UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -32, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
Expand Down
15 changes: 7 additions & 8 deletions llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,27 @@ define float @foo2(double* %x0, double* %x1) nounwind {
; CHECK-LABEL: foo2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: sub sp, sp, #16 // =16
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add x8, sp, #16 // =16
; CHECK-NEXT: add x9, sp, #16 // =16
; CHECK-NEXT: addvl x8, x29, #-4
; CHECK-NEXT: fmov s0, #1.00000000
; CHECK-NEXT: st1d { z16.d }, p0, [x29, #-4, mul vl]
; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl]
; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl]
; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl]
; CHECK-NEXT: mov w1, #1
; CHECK-NEXT: mov w2, #2
; CHECK-NEXT: mov w3, #3
; CHECK-NEXT: mov w4, #4
; CHECK-NEXT: mov w5, #5
; CHECK-NEXT: mov w6, #6
; CHECK-NEXT: mov w7, #7
; CHECK-NEXT: str x8, [sp, #-16]!
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl]
; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl]
; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl]
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl callee2
; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: add sp, sp, #16 // =16
Expand Down

0 comments on commit 4263585

Please sign in to comment.