Skip to content

Commit

Permalink
[AArch64][PtrAuth] Fix unwind state for tail calls
Browse files Browse the repository at this point in the history
When generating unwind tables for code which uses return-address
signing, we need to toggle the RA_SIGN_STATE DWARF register around any
tail-calls, because these require the return address to be authenticated
before the call, and could throw an exception. This is done using the
.cfi_negate_ra_state directive before the call, and .cfi_restore_state
at the start of the next basic block.

However, since D153098, the .cfi_restore_state isn't being inserted,
because the CFIFixup pass isn't being run. This re-enables that pass
when return-adress signing is enabled.

Reviewed By: ikudrin, MaskRay

Differential Revision: https://reviews.llvm.org/D156428
  • Loading branch information
ostannard committed Aug 3, 2023
1 parent b3b2a92 commit f2e7285
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 29 deletions.
13 changes: 8 additions & 5 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1908,6 +1908,7 @@ static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB,
return;
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
bool EmitAsyncCFI = MFI.needsAsyncDwarfUnwindInfo(MF);

MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc DL;
Expand All @@ -1933,11 +1934,13 @@ static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB,
TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
.setMIFlag(MachineInstr::FrameDestroy);

unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameDestroy);
if (EmitAsyncCFI) {
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameDestroy);
}
if (NeedsWinCFI) {
*HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
# CHECK: BL @[[OUTLINED_FUNCTION]]
# CHECK: bb.5:
# CHECK: frame-destroy AUTIBSP
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION negate_ra_sign_state
# CHECK-NEXT: RET
name: foo
tracksRegLiveness: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ define i32 @a() #0 {
; CHECK-NEXT: .cfi_negate_ra_state
; V8A: hint #29
; V83A: autiasp
; CHECK-NEXT: .cfi_negate_ra_state
; CHECK-NEXT: ret
entry:
%call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
Expand All @@ -30,7 +29,6 @@ define i32 @b() #0 {
; CHECK-NEXT: .cfi_negate_ra_state
; V8A: hint #29
; V83A: autiasp
; CHECK-NEXT: .cfi_negate_ra_state
; CHECK-NEXT: ret
entry:
%call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
Expand All @@ -46,7 +44,6 @@ define hidden i32 @c(ptr %fptr) #0 {
; CHECK-NEXT: .cfi_negate_ra_state
; V8A: hint #29
; V83A: autiasp
; CHECK-NEXT: .cfi_negate_ra_state
; CHECK-NEXT: ret
entry:
%call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4)
Expand All @@ -62,7 +59,6 @@ define hidden i32 @d(ptr %fptr) #0 {
; CHECK-NEXT: .cfi_negate_ra_state
; V8A: hint #29
; V83A: autiasp
; CHECK-NEXT: .cfi_negate_ra_state
; CHECK-NEXT: ret
entry:
%call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4)
Expand Down
212 changes: 204 additions & 8 deletions llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,47 @@
; RUN: llc -mtriple=aarch64 < %s | FileCheck --check-prefixes CHECK,CHECK-V8A %s
; RUN: llc -mtriple=aarch64 -mattr=v8.3a < %s | FileCheck --check-prefixes CHECK,CHECK-V83A %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64 < %s | FileCheck --check-prefixes CHECK-V8A %s
; RUN: llc -mtriple=aarch64 -mattr=v8.3a < %s | FileCheck --check-prefixes CHECK-V83A %s
; RUN: llc -mtriple=aarch64 -filetype=obj -o - <%s | llvm-dwarfdump -v - | FileCheck --check-prefix=CHECK-DUMP %s

@.str = private unnamed_addr constant [15 x i8] c"some exception\00", align 1
@_ZTIPKc = external dso_local constant ptr

; CHECK: @_Z3fooi
; CHECK-V8A: hint #25
; CHECK-V83A: paciasp
; CHECK-NEXT: .cfi_negate_ra_state
; CHECK-NOT: .cfi_negate_ra_state
define dso_local i32 @_Z3fooi(i32 %x) #0 {
; CHECK-V8A-LABEL: _Z3fooi:
; CHECK-V8A: // %bb.0: // %entry
; CHECK-V8A-NEXT: hint #25
; CHECK-V8A-NEXT: .cfi_negate_ra_state
; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16
; CHECK-V8A-NEXT: .cfi_offset w30, -16
; CHECK-V8A-NEXT: str w0, [sp, #8]
; CHECK-V8A-NEXT: mov w0, #8 // =0x8
; CHECK-V8A-NEXT: bl __cxa_allocate_exception
; CHECK-V8A-NEXT: adrp x8, .L.str
; CHECK-V8A-NEXT: add x8, x8, :lo12:.L.str
; CHECK-V8A-NEXT: adrp x1, _ZTIPKc
; CHECK-V8A-NEXT: add x1, x1, :lo12:_ZTIPKc
; CHECK-V8A-NEXT: mov x2, xzr
; CHECK-V8A-NEXT: str x8, [x0]
; CHECK-V8A-NEXT: bl __cxa_throw
;
; CHECK-V83A-LABEL: _Z3fooi:
; CHECK-V83A: // %bb.0: // %entry
; CHECK-V83A-NEXT: paciasp
; CHECK-V83A-NEXT: .cfi_negate_ra_state
; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16
; CHECK-V83A-NEXT: .cfi_offset w30, -16
; CHECK-V83A-NEXT: str w0, [sp, #8]
; CHECK-V83A-NEXT: mov w0, #8 // =0x8
; CHECK-V83A-NEXT: bl __cxa_allocate_exception
; CHECK-V83A-NEXT: adrp x8, .L.str
; CHECK-V83A-NEXT: add x8, x8, :lo12:.L.str
; CHECK-V83A-NEXT: adrp x1, _ZTIPKc
; CHECK-V83A-NEXT: add x1, x1, :lo12:_ZTIPKc
; CHECK-V83A-NEXT: mov x2, xzr
; CHECK-V83A-NEXT: str x8, [x0]
; CHECK-V83A-NEXT: bl __cxa_throw
entry:
%retval = alloca i32, align 4
%x.addr = alloca i32, align 4
Expand All @@ -25,10 +56,175 @@ return: ; No predecessors!
ret i32 %0
}

; For asynchronous unwind tables, we need to flip the value of RA_SIGN_STATE
; before and after the tail call.
define hidden noundef i32 @baz_async(i32 noundef %a) #0 uwtable(async) {
; CHECK-V8A-LABEL: baz_async:
; CHECK-V8A: // %bb.0: // %entry
; CHECK-V8A-NEXT: hint #25
; CHECK-V8A-NEXT: .cfi_negate_ra_state
; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16
; CHECK-V8A-NEXT: .cfi_offset w30, -16
; CHECK-V8A-NEXT: .cfi_remember_state
; CHECK-V8A-NEXT: cbz w0, .LBB1_2
; CHECK-V8A-NEXT: // %bb.1: // %if.then
; CHECK-V8A-NEXT: mov w0, wzr
; CHECK-V8A-NEXT: bl _Z3bari
; CHECK-V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-V8A-NEXT: .cfi_def_cfa_offset 0
; CHECK-V8A-NEXT: hint #29
; CHECK-V8A-NEXT: .cfi_negate_ra_state
; CHECK-V8A-NEXT: .cfi_restore w30
; CHECK-V8A-NEXT: b _Z3bari
; CHECK-V8A-NEXT: .LBB1_2: // %if.else
; CHECK-V8A-NEXT: .cfi_restore_state
; CHECK-V8A-NEXT: bl _Z4quuxi
; CHECK-V8A-NEXT: add w0, w0, #1
; CHECK-V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-V8A-NEXT: .cfi_def_cfa_offset 0
; CHECK-V8A-NEXT: hint #29
; CHECK-V8A-NEXT: .cfi_negate_ra_state
; CHECK-V8A-NEXT: .cfi_restore w30
; CHECK-V8A-NEXT: ret
;
; CHECK-V83A-LABEL: baz_async:
; CHECK-V83A: // %bb.0: // %entry
; CHECK-V83A-NEXT: paciasp
; CHECK-V83A-NEXT: .cfi_negate_ra_state
; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16
; CHECK-V83A-NEXT: .cfi_offset w30, -16
; CHECK-V83A-NEXT: .cfi_remember_state
; CHECK-V83A-NEXT: cbz w0, .LBB1_2
; CHECK-V83A-NEXT: // %bb.1: // %if.then
; CHECK-V83A-NEXT: mov w0, wzr
; CHECK-V83A-NEXT: bl _Z3bari
; CHECK-V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-V83A-NEXT: .cfi_def_cfa_offset 0
; CHECK-V83A-NEXT: autiasp
; CHECK-V83A-NEXT: .cfi_negate_ra_state
; CHECK-V83A-NEXT: .cfi_restore w30
; CHECK-V83A-NEXT: b _Z3bari
; CHECK-V83A-NEXT: .LBB1_2: // %if.else
; CHECK-V83A-NEXT: .cfi_restore_state
; CHECK-V83A-NEXT: bl _Z4quuxi
; CHECK-V83A-NEXT: add w0, w0, #1
; CHECK-V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-V83A-NEXT: .cfi_def_cfa_offset 0
; CHECK-V83A-NEXT: .cfi_restore w30
; CHECK-V83A-NEXT: retaa
entry:
%tobool.not = icmp eq i32 %a, 0
br i1 %tobool.not, label %if.else, label %if.then

if.then: ; preds = %entry
%call = tail call noundef i32 @_Z3bari(i32 noundef 0)
%call1 = tail call noundef i32 @_Z3bari(i32 noundef %call)
br label %return

if.else: ; preds = %entry
%call2 = tail call noundef i32 @_Z4quuxi(i32 noundef 0)
%add = add nsw i32 %call2, 1
br label %return

return: ; preds = %if.else, %if.then
%retval.0 = phi i32 [ %call1, %if.then ], [ %add, %if.else ]
ret i32 %retval.0
}

; For synchronous unwind tables, we don't need to update the unwind tables
; around the tail call. The tail-called function might throw an exception, but
; at this point we are set up to return into baz's caller, so the unwinder will
; never see baz's unwind table for that exception.
define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) {
; CHECK-V8A-LABEL: baz_sync:
; CHECK-V8A: // %bb.0: // %entry
; CHECK-V8A-NEXT: hint #25
; CHECK-V8A-NEXT: .cfi_negate_ra_state
; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16
; CHECK-V8A-NEXT: .cfi_offset w30, -16
; CHECK-V8A-NEXT: cbz w0, .LBB2_2
; CHECK-V8A-NEXT: // %bb.1: // %if.then
; CHECK-V8A-NEXT: mov w0, wzr
; CHECK-V8A-NEXT: bl _Z3bari
; CHECK-V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-V8A-NEXT: hint #29
; CHECK-V8A-NEXT: b _Z3bari
; CHECK-V8A-NEXT: .LBB2_2: // %if.else
; CHECK-V8A-NEXT: bl _Z4quuxi
; CHECK-V8A-NEXT: add w0, w0, #1
; CHECK-V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-V8A-NEXT: hint #29
; CHECK-V8A-NEXT: ret
;
; CHECK-V83A-LABEL: baz_sync:
; CHECK-V83A: // %bb.0: // %entry
; CHECK-V83A-NEXT: paciasp
; CHECK-V83A-NEXT: .cfi_negate_ra_state
; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16
; CHECK-V83A-NEXT: .cfi_offset w30, -16
; CHECK-V83A-NEXT: cbz w0, .LBB2_2
; CHECK-V83A-NEXT: // %bb.1: // %if.then
; CHECK-V83A-NEXT: mov w0, wzr
; CHECK-V83A-NEXT: bl _Z3bari
; CHECK-V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-V83A-NEXT: autiasp
; CHECK-V83A-NEXT: b _Z3bari
; CHECK-V83A-NEXT: .LBB2_2: // %if.else
; CHECK-V83A-NEXT: bl _Z4quuxi
; CHECK-V83A-NEXT: add w0, w0, #1
; CHECK-V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-V83A-NEXT: retaa
entry:
%tobool.not = icmp eq i32 %a, 0
br i1 %tobool.not, label %if.else, label %if.then

if.then: ; preds = %entry
%call = tail call noundef i32 @_Z3bari(i32 noundef 0)
%call1 = tail call noundef i32 @_Z3bari(i32 noundef %call)
br label %return

if.else: ; preds = %entry
%call2 = tail call noundef i32 @_Z4quuxi(i32 noundef 0)
%add = add nsw i32 %call2, 1
br label %return

return: ; preds = %if.else, %if.then
%retval.0 = phi i32 [ %call1, %if.then ], [ %add, %if.else ]
ret i32 %retval.0
}

declare dso_local ptr @__cxa_allocate_exception(i64)

declare dso_local void @__cxa_throw(ptr, ptr, ptr)

declare dso_local noundef i32 @_Z3bari(i32 noundef) local_unnamed_addr
declare dso_local noundef i32 @_Z4quuxi(i32 noundef) local_unnamed_addr

attributes #0 = { "sign-return-address"="all" }

;CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state
; foo
; CHECK-DUMP-LABEL: FDE
; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state:
; CHECK-DUMP-NOT: DW_CFA_AARCH64_negate_ra_state
; CHECK-DUMP-NOT: DW_CFA_remember_state
; CHECK-DUMP-NOT: DW_CFA_restore_state

; baz_async
; CHECK-DUMP-LABEL: FDE
; CHECK-DUMP: Format: DWARF32
; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state:
; CHECK-DUMP: DW_CFA_remember_state:
; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state:
; CHECK-DUMP: DW_CFA_restore_state:
; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state:

; baz_sync
; CHECK-DUMP-LABEL: FDE
; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state:
; CHECK-DUMP-NOT: DW_CFA_AARCH64_negate_ra_state
; CHECK-DUMP-NOT: DW_CFA_remember_state
; CHECK-DUMP-NOT: DW_CFA_restore_state
11 changes: 0 additions & 11 deletions llvm/test/CodeGen/AArch64/sign-return-address.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ define i32 @leaf_sign_all(i32 %x) "sign-return-address"="all" {
; COMPAT-NEXT: hint #25
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: hint #29
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: ret
;
; V83A-LABEL: leaf_sign_all:
Expand All @@ -53,7 +52,6 @@ define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf" {
; COMPAT-NEXT: //NO_APP
; COMPAT-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; COMPAT-NEXT: hint #29
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: ret
;
; V83A-LABEL: leaf_clobbers_lr:
Expand Down Expand Up @@ -85,7 +83,6 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" {
; COMPAT-NEXT: bl foo
; COMPAT-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; COMPAT-NEXT: hint #29
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: ret
;
; V83A-LABEL: non_leaf_sign_all:
Expand Down Expand Up @@ -113,7 +110,6 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf" {
; COMPAT-NEXT: bl foo
; COMPAT-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; COMPAT-NEXT: hint #29
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: ret
;
; V83A-LABEL: non_leaf_sign_non_leaf:
Expand Down Expand Up @@ -144,7 +140,6 @@ define i32 @non_leaf_scs(i32 %x) "sign-return-address"="non-leaf" shadowcallstac
; CHECK-NEXT: bl foo
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: autiasp
; CHECK-NEXT: .cfi_negate_ra_state
; CHECK-NEXT: ldr x30, [x18, #-8]!
; CHECK-NEXT: ret
%call = call i32 @foo(i32 %x)
Expand Down Expand Up @@ -175,7 +170,6 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" {
; COMPAT-NEXT: //NO_APP
; COMPAT-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; COMPAT-NEXT: hint #29
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: b bar
;
; V83A-LABEL: spill_lr_and_tail_call:
Expand All @@ -190,7 +184,6 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" {
; V83A-NEXT: //NO_APP
; V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; V83A-NEXT: autiasp
; V83A-NEXT: .cfi_negate_ra_state
; V83A-NEXT: b bar
call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1
tail call fastcc i64 @bar(i64 %x)
Expand All @@ -203,7 +196,6 @@ define i32 @leaf_sign_all_a_key(i32 %x) "sign-return-address"="all" "sign-return
; COMPAT-NEXT: hint #25
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: hint #29
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: ret
;
; V83A-LABEL: leaf_sign_all_a_key:
Expand All @@ -221,7 +213,6 @@ define i32 @leaf_sign_all_b_key(i32 %x) "sign-return-address"="all" "sign-return
; COMPAT-NEXT: hint #27
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: hint #31
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: ret
;
; V83A-LABEL: leaf_sign_all_b_key:
Expand Down Expand Up @@ -250,7 +241,6 @@ define i32 @leaf_sign_all_a_key_bti(i32 %x) "sign-return-address"="all" "sign-re
; COMPAT-NEXT: hint #25
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: hint #29
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: ret
;
; V83A-LABEL: leaf_sign_all_a_key_bti:
Expand All @@ -269,7 +259,6 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "sign-return-address"="all" "sign-re
; COMPAT-NEXT: hint #27
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: hint #31
; COMPAT-NEXT: .cfi_negate_ra_state
; COMPAT-NEXT: ret
;
; V83A-LABEL: leaf_sign_all_b_key_bti:
Expand Down

0 comments on commit f2e7285

Please sign in to comment.