-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Fix stack probing clobbering flags #81879
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Momchil Velikov (momchil-velikov) ChangesCertain stack probing sequences might clobber flags, then we can't use a Full diff: https://github.com/llvm/llvm-project/pull/81879.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 2d0ca6e6d0d3fc..b7bf9db906371a 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1058,9 +1058,14 @@ bool AArch64FrameLowering::canUseAsPrologue(
return false;
}
+ // Certain stack probing sequences might clobber flags, then we can't use
+ // the block as a prologue if the flags register is a live-in.
+ if (TLI->hasInlineStackProbe(*MF) && MBB.isLiveIn(AArch64::NZCV))
+ return false;
+
// Don't need a scratch register if we're not going to re-align the stack or
// emit stack probes.
- if (!RegInfo->hasStackRealignment(*MF) && TLI->hasInlineStackProbe(*MF))
+ if (!RegInfo->hasStackRealignment(*MF) && !TLI->hasInlineStackProbe(*MF))
return true;
// Otherwise, we can use any block as long as it has a scratch register
// available.
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir b/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir
new file mode 100644
index 00000000000000..f50bd9ab4b8a1b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-probing-no-scratch-reg.mir
@@ -0,0 +1,105 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc %s --start-before=shrink-wrap -stop-after=prologepilog -o - | FileCheck %s
+--- |
+ target triple = "aarch64-linux"
+
+ define void @f(i32 %n) #0 {
+ entry:
+ %a = alloca i8, i32 150000, align 8
+ %c0 = icmp sle i32 %n, 1
+ br i1 %c0, label %if.then1, label %exit
+
+ if.then1:
+ call void @g(ptr %a)
+ br label %exit
+
+ exit:
+ ret void
+ }
+
+ declare void @g(...)
+
+ attributes #0 = { nounwind "probe-stack"="inline-asm" "stack-probe-size"="4096" }
+
+...
+---
+name: f
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ localFrameSize: 150000
+stack:
+ - { id: 0, name: a, type: default, offset: 0, size: 150000, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -150000, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: f
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $lr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1)
+ ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 36, 12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.entry:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x25, $x27, $x28
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1, 12
+ ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0
+ ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv
+ ; CHECK-NEXT: frame-setup Bcc 1, %bb.3, implicit $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x25, $x27, $x28
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2544, 0
+ ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0
+ ; CHECK-NEXT: $x9 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 12, %bb.2, implicit $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.if.then1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x23, $x25, $x25, $x27, $x28
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x0 = ADDXri $sp, 0, 0
+ ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.exit:
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 36, 12
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2544, 0
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $w0
+
+ $x9 = IMPLICIT_DEF
+ dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
+ Bcc 12, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1.if.then1:
+ successors: %bb.2(0x80000000)
+ liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x23, $x25, $x25, $x27, $x28
+
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $x0 = ADDXri %stack.0.a, 0, 0
+ BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.2.exit:
+ RET_ReallyLR
+
+...
diff --git a/llvm/test/CodeGen/AArch64/stack-probing-shrink-wrap.mir b/llvm/test/CodeGen/AArch64/stack-probing-shrink-wrap.mir
new file mode 100644
index 00000000000000..83aa90d389a4a2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-probing-shrink-wrap.mir
@@ -0,0 +1,107 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc %s --start-before=shrink-wrap --stop-after=prologepilog -o - | FileCheck %s
+--- |
+ target triple = "aarch64-linux"
+
+ define void @f(i32 %n) #0 {
+ entry:
+ %a = alloca i8, i32 150000, align 8
+ %c0 = icmp sle i32 %n, 1
+ br i1 %c0, label %if.then1, label %exit
+
+ if.then1: ; preds = %entry
+ %0 = icmp sle i32 %n, 1
+ %v = select i1 %0, i32 0, i32 1
+ call void @g(ptr %a, i32 %v)
+ br label %exit
+
+ exit: ; preds = %if.then1, %entry
+ ret void
+ }
+
+ declare void @g(...)
+
+ attributes #0 = { nounwind "probe-stack"="inline-asm" "stack-probe-size"="4096" }
+
+...
+---
+name: f
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ localFrameSize: 150000
+stack:
+ - { id: 0, name: a, type: default, offset: 0, size: 150000, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: -150000, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: f
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $w0, $lr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.2), (store (s64) into %stack.1)
+ ; CHECK-NEXT: $x9 = frame-setup SUBXri $sp, 36, 12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.entry:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: liveins: $w0, $x9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1, 12
+ ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0
+ ; CHECK-NEXT: $xzr = frame-setup SUBSXrx64 $sp, $x9, 24, implicit-def $nzcv
+ ; CHECK-NEXT: frame-setup Bcc 1, %bb.3, implicit $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2544, 0
+ ; CHECK-NEXT: frame-setup STRXui $xzr, $sp, 0
+ ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 12, %bb.2, implicit $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.if.then1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: liveins: $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $w1 = CSINCWr $wzr, $wzr, 13, implicit killed $nzcv
+ ; CHECK-NEXT: $x0 = ADDXri $sp, 0, 0
+ ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $w1, implicit-def $sp
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.exit:
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 36, 12
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2544, 0
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.1)
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $w0
+
+ dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv
+ Bcc 12, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1.if.then1:
+ successors: %bb.2(0x80000000)
+ liveins: $nzcv
+
+ renamable $w1 = CSINCWr $wzr, $wzr, 13, implicit killed $nzcv
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $x0 = ADDXri %stack.0.a, 0, 0
+ BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $w1, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+ bb.2.exit:
+ RET_ReallyLR
+
+...
|
Certain stack probing sequences might clobber flags, then we can't use a block as a prologue if the flags register is a live-in on entry to that block.
625d755
to
965cec4
Compare
The difference now is that the condition would trigger for Windows, where stack probing does clobber flags.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Certain stack probing sequences might clobber flags, then we can't use a
block as a prologue if the flags register is a live-in on entry to that
block.