Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SEH for Windows aarch64: invalid Packed unwind data detection with H=1 #54879

Open
egelke opened this issue Apr 12, 2022 · 9 comments
Open

SEH for Windows aarch64: invalid Packed unwind data detection with H=1 #54879

egelke opened this issue Apr 12, 2022 · 9 comments

Comments

@egelke
Copy link

egelke commented Apr 12, 2022

There is an issue with the the detection of "packed unwind data" when compiling aaarch64 asm with SEH directives on Windows, e.g.:

clang -g  -c -target aarch64-pc-windows-msvc -o obj\main.obj main.asm
link /OUT:bin\program.exe "kernel32.lib" /DEBUG /MACHINE:ARM64 /SUBSYSTEM:CONSOLE /NOLOGO /NODEFAULTLIB /ENTRY:"_start" obj\main.obj

Everything works fine as long as you don't home the integer parameter registers. The below example results in a correctly packed unwind data.

     .global func
func:
    .seh_proc func
    stp x19, x20,[sp,#-0x10]!
    .seh_save_regp_x x19, 0x10
    stp fp, lr, [sp, #-0x10]!
    .seh_save_fplr_x 0x10
    mov fp, sp
    .seh_set_fp
    .seh_endprologue

    //return success
    mov w0, wzr

    .seh_startepilogue
    ldp fp, lr, [sp], #0x10
    .seh_save_fplr_x 0x10
    ldp x19, x20, [sp], #0x10
    .seh_save_regp_x x19, 0x10
    .seh_endepilogue

    ret
    .seh_endfunclet
    .seh_endproc
c:\> dumpbin /UNWINDINFO bin\program.exe
           Begin    Packed  Function Name

  00000010 00001038    Y     func
   Start=40001038  Flag=1  FuncLen=1C  RegF=0  RegI=2  H=0  CR=3  FrameSize=0x20
      [RawPdata=00001038 0162001D]
      +0000 stp  x19,x20,[sp,#-0x10]!; Actual=stp         x19,x20,[sp,#-0x10]!
      +0004 stp  fp,lr,[sp,#-0x10]!  ; Actual=stp         fp,lr,[sp,#-0x10]!
      +0008 mov  fp,sp               ; Actual=mov         fp,sp
   Epilog #1 unwind:  (Offset=10)
      +0010 ldp  fp,lr,[sp],#0x10    ; Actual=ldp         fp,lr,[sp],#0x10
      +0014 ldp  x19,x20,[sp],#0x10  ; Actual=ldp         x19,x20,[sp],#0x10
      +0018 ret                      ; Actual=ret

However, when you home the integer parameter registers, i.e. try to generate packed unwind data with H=1, it fails recognize that it's a packed format and generates an unpacked version instead:

     .global func
func:
    .seh_proc func
    stp x19, x20,[sp,#-0x50]!
    .seh_save_regp_x x19, 0x50
    stp x0, x1, [sp, #0x10]
    .seh_nop
    stp x2, x3, [sp, #0x20]
    .seh_nop
    stp x4, x5, [sp, #0x30]
    .seh_nop
    stp x6, x7, [sp, #0x40]
    .seh_nop
    stp fp, lr, [sp, #-0x10]!
    .seh_save_fplr_x 0x10
    mov fp, sp
    .seh_set_fp
    .seh_endprologue

    //return success
    mov w0, wzr

    .seh_startepilogue
    ldp fp, lr, [sp], #0x10
    .seh_save_fplr_x 0x10
    ldp x19, x20, [sp], #0x50
    .seh_save_regp_x x19, 0x50
    .seh_endepilogue

    ret
    .seh_endfunclet
    .seh_endproc
c:\> dumpbin /UNWINDINFO bin\program.exe
           Begin    Packed  Function Name

  00000010 00001038    N     func
   Start=40001038  Xdata=400032E8
      [RawPdata=00001038 000032E8]
   FuncLen=2C  Vers=0  X=0  E=0  Epilogs=1  CodeWords=3
   Prolog unwind:
      06: 2A...... +0000 stp   x19,x20,[sp,#-0x50]!; Actual=stp         x19,x20,[sp,#-0x50]!
      05: E3...... +0004 nop                      ; Actual=stp         x0,x1,[sp,#0x10]
      04: E3...... +0008 nop                      ; Actual=stp         x2,x3,[sp,#0x20]
      03: E3...... +000C nop                      ; Actual=stp         x4,x5,[sp,#0x30]
      02: E3...... +0010 nop                      ; Actual=stp         x6,x7,[sp,#0x40]
      01: 81...... +0014 stp   fp,lr,[sp,#-0x10]! ; Actual=stp         fp,lr,[sp,#-0x10]!
      00: E1...... +0018 mov   fp,sp              ; Actual=mov         fp,sp
                   +001C (end sequence)
   Epilog #1 unwind:  (Offset=10  Index=8)
      08: 81...... +0020 ldp   fp,lr,[sp],#0x10   ; Actual=ldp         fp,lr,[sp],#0x10
      09: 2A...... +0024 ldp   x19,x20,[sp],#0x50 ; Actual=ldp         x19,x20,[sp],#0x50
      0A: E4...... +0028 end                      ; Actual=ret
                   +0030 (end sequence)
   [RawXdata=1840000B 02000008 E3E381E1 E42AE3E3 E3E42A81]

If you however, repeat the "nop" seh directives with instructions into the unwind code, then you get an incorrect packed unwind data as shows by the following example:

     .global func
func:
    .seh_proc func
    stp x19, x20,[sp,#-0x50]!
    .seh_save_regp_x x19, 0x50
    stp x0, x1, [sp, #0x10]
    .seh_nop
    stp x2, x3, [sp, #0x20]
    .seh_nop
    stp x4, x5, [sp, #0x30]
    .seh_nop
    stp x6, x7, [sp, #0x40]
    .seh_nop
    stp fp, lr, [sp, #-0x10]!
    .seh_save_fplr_x 0x10
    mov fp, sp
    .seh_set_fp
    .seh_endprologue

    //return success
    mov w0, wzr

    .seh_startepilogue
    ldp fp, lr, [sp], #0x10
    .seh_save_fplr_x 0x10
    ldp x6, x7, [sp, 0x40]
    .seh_nop
    ldp x4, x5, [sp, 0x30]
    .seh_nop
    ldp x2, x3, [sp, 0x20]
    .seh_nop
    ldp x0, x1, [sp, 0x10]
    .seh_nop
    ldp x19, x20, [sp], #0x50
    .seh_save_regp_x x19, 0x50
    .seh_endepilogue

    ret
    .seh_endfunclet
    .seh_endproc
  00000010 00001038    Y     func
   Start=40001038  Flag=1  FuncLen=3C  RegF=0  RegI=2  H=1  CR=3  FrameSize=0x60
      [RawPdata=00001038 0372003D]
      +0000 stp  x19,x20,[sp,#-0x50]!; Actual=stp         x19,x20,[sp,#-0x50]!
      +0004 stp  x0,x1,[sp,#0x10]    ; Actual=stp         x0,x1,[sp,#0x10]
      +0008 stp  x2,x3,[sp,#0x20]    ; Actual=stp         x2,x3,[sp,#0x20]
**** Expected opcode A90207E0
      +000C stp  x4,x5,[sp,#0x30]    ; Actual=stp         x4,x5,[sp,#0x30]
**** Expected opcode A90307E0
      +0010 stp  x6,x7,[sp,#0x40]    ; Actual=stp         x6,x7,[sp,#0x40]
**** Expected opcode A90407E0
      +0014 stp  fp,lr,[sp,#-0x10]!  ; Actual=stp         fp,lr,[sp,#-0x10]!
      +0018 mov  fp,sp               ; Actual=mov         fp,sp
   Epilog #1 unwind:  (Offset=30)
      +0030 ldp  fp,lr,[sp],#0x10    ; Actual=ldp         x0,x1,[sp,#0x10]
**** Expected opcode A8C17BFD
      +0034 ldp  x19,x20,[sp],#0x50  ; Actual=ldp         x19,x20,[sp],#0x50
      +0038 ret                      ; Actual=ret

(please ignore the bug in MS dumpbin.exe, the opcode is correct it's just the check that is wrong since it expect x0 and x1 instead of x2 and x3)

Obviously, a NOP should not be repeated in the unwind code. Instead, when clang encounters a .seh_nop directive in the prologue it should not expect any matching instruction in the epilogue in order for it to to generate a packed unwind data.

@llvmbot
Copy link
Collaborator

llvmbot commented Apr 12, 2022

@llvm/issue-subscribers-backend-aarch64

@zhaoshiz
Copy link
Contributor

Hello, @egelke

I'm not familiar with Win64EH and unwinding. Can you confirm if below is correct packed unwind data or not for your test case 2 (homing int params but no mirrored nop in epilog)?


           Begin    Packed  Function Name

  00000000 00001000    Y     func
   Start=40001000  Flag=1  FuncLen=2C  RegF=0  RegI=2  H=1  CR=3  FrameSize=0x60
      [RawPdata=00001000 0372002D]
      +0000 stp  x19,x20,[sp,#-0x50]!; Actual=stp         x19,x20,[sp,#-0x50]!
      +0004 stp  x0,x1,[sp,#0x10]    ; Actual=stp         x0,x1,[sp,#0x10]
      +0008 stp  x2,x3,[sp,#0x20]    ; Actual=stp         x2,x3,[sp,#0x20]
**** Expected opcode A90207E0
      +000C stp  x4,x5,[sp,#0x30]    ; Actual=stp         x4,x5,[sp,#0x30]
**** Expected opcode A90307E0
      +0010 stp  x6,x7,[sp,#0x40]    ; Actual=stp         x6,x7,[sp,#0x40]
**** Expected opcode A90407E0
      +0014 stp  fp,lr,[sp,#-0x10]!  ; Actual=stp         fp,lr,[sp,#-0x10]!
      +0018 mov  fp,sp               ; Actual=mov         fp,sp
   Epilog #1 unwind:  (Offset=20)
      +0020 ldp  fp,lr,[sp],#0x10    ; Actual=ldp         fp,lr,[sp],#0x10
      +0024 ldp  x19,x20,[sp],#0x50  ; Actual=ldp         x19,x20,[sp],#0x50
      +0028 ret                      ; Actual=ret

@egelke
Copy link
Author

egelke commented Apr 28, 2022

To be honest I'm still quite novice, but yes that seems to right (the assembly , not the expected opcodes).

@zhaoshiz
Copy link
Contributor

I pushed a fix for this: https://reviews.llvm.org/D125433

@mstorsjo
Copy link
Member

While I do agree that it wouldn't make sense to have the corresponding nops in the epilogue, the Windows unwinder actually does seem to behave as if it expects the epilogue to have them. That's why I implemented it this way originally. (I hadn't noticed dumpbin's /unwindinfo option, so I relied mostly on docs + empirical observation of the behaviours.)

So here, the docs would imply that the epilogue shouldn't have nops:

https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#packed-unwind-data

The epilogs for canonical functions follow a similar form, except H has no effect, the set_fp instruction is omitted, and the order of steps and the instructions in each step are reversed in the epilog.

And the dumpbin output also seems to agree with this.

However, if you create a function with packed unwind info with the H=1 flag set, and test unwinding on it with RtlVirtualUnwind, at offsets in the function around the epilogue, you will see that the unwinder does indeed believe that the epilogue does contain similar mirrored nop instructions. I made such a testcase, have a look at https://gist.github.com/mstorsjo/fb34547843ae8f65935ca532366030c2. There, I test unwinding on a instruction sequence that looks like this:

    0xf3, 0x0f, 0x1b, 0xf8,   /* 00: str x19, [sp, #-80]! */
    0xe0, 0x07, 0x01, 0xa9,   /* 04: stp x0, x1, [sp, #16] */
    0xe2, 0x0f, 0x02, 0xa9,   /* 08: stp x2, x3, [sp, #32] */
    0xe4, 0x17, 0x03, 0xa9,   /* 0c: stp x4, x5, [sp, #48] */
    0xe6, 0x1f, 0x04, 0xa9,   /* 10: stp x6, x7, [sp, #64] */
    0xff, 0x83, 0x00, 0xd1,   /* 14: sub sp, sp, #32 */
    0x1f, 0x20, 0x03, 0xd5,   /* 18: nop */
    0x1f, 0x20, 0x03, 0xd5,   /* 1c: nop */
    0x1f, 0x20, 0x03, 0xd5,   /* 20: nop */
    0xff, 0x83, 0x00, 0x91,   /* 24: add sp, sp, #32 */
    0x1f, 0x20, 0x03, 0xd5,   /* 28: nop */
    0x1f, 0x20, 0x03, 0xd5,   /* 2c: nop */
    0x1f, 0x20, 0x03, 0xd5,   /* 30: nop */
    0x1f, 0x20, 0x03, 0xd5,   /* 34: nop */
    0xf3, 0x07, 0x45, 0xf8,   /* 38: ldr x19, [sp], #80 */
    0xc0, 0x03, 0x5f, 0xd6,   /* 3c: ret */

Dumpbin for this function thinks that the epilogue should look like this:

   Epilog #1 unwind:  (Offset=34)
      +0034 add  sp,sp,#20           ; Actual=nop
      +0038 ldr  x19,[sp],#0x50      ; Actual=ldr         x19,[sp],#0x50 
      +003C ret                      ; Actual=ret

Unwinding from different offsets adjusts the Sp register like this:

func offset 0x20: Sp adjusted 0x70 bytes
func offset 0x24: Sp adjusted 0x70 bytes
func offset 0x28: Sp adjusted 0x50 bytes
func offset 0x2c: Sp adjusted 0x50 bytes
func offset 0x30: Sp adjusted 0x50 bytes
func offset 0x34: Sp adjusted 0x50 bytes
func offset 0x38: Sp adjusted 0x50 bytes
func offset 0x3c: Sp adjusted 0x0 bytes

According to dumpbin, the epilogue should start at offset 0x34 - thus unwinding from offset 0x30 should definitely increment Sp by the full amount 0x70. But in practice, Sp is increased by 0x70 only if unwinding from 0x24 or earlier. So this does make it look like the implementation of RtlVirtualUnwind actually expects a matching, exactly mirrored epilogue, even if this doesn't make much sense practically, and while this deviates from the docs.

I'm not sure if MSVC actually ever emits packed unwind info with the H=1 flag set. I tried to trigger it, with a test snippet like this:

#include <stdarg.h>

void other(va_list ap, void* ptr);

void func(int a, ...) {
    va_list ap;
    va_start(ap, a);
    char buf[32];
    other(ap, buf);
    va_end(ap);
}

Compiling this with MSVC (I tested 2017, 2019 and 2022) I don't get packed unwind info with any of them:

$ cl -c arm64-seh-varargs.c -GS- 
Microsoft (R) C/C++ Optimizing Compiler Version 19.30.30705 for ARM64
Copyright (C) Microsoft Corporation.  All rights reserved.

arm64-seh-varargs.c
$ llvm-readobj --unwind arm64-seh-varargs.obj 

File: arm64-seh-varargs.obj
Format: COFF-ARM64
Arch: aarch64
AddressSize: 64bit
UnwindInformation [
  RuntimeFunction {
    Function: func (0x0)
    ExceptionRecord: $unwind$func (0x0)
    ExceptionData {
      FunctionLength: 72
      Version: 0
      ExceptionData: No
      EpiloguePacked: Yes
      EpilogueOffset: 8
      ByteCodeLength: 12
      Prologue [
        0xe1                ; mov fp, sp
        0x87                ; stp x29, x30, [sp, #-64]!
        0xe3                ; nop
        0xe3                ; nop
        0xe3                ; nop
        0xe3                ; nop
        0x04                ; sub sp, #64
        0xe4                ; end
      ]
      Epilogue [
        0x87                ; ldp x29, x30, [sp], #64
        0x04                ; add sp, #64
        0xe4                ; end
      ]
    }
  }
]

So it would seem to me that MSVC actually doesn't ever generate packed unwind info with H=1 set, and therefore, the Windows unwinder inconsistency hasn't ever been noticed.

So in this case, if we change LLVM's code generation to match the docs and dumpbin, we'd actually get incorrect unwinding results in practice, near the start of the epilogue. Therefore, I guess the only practical thing we can do in LLVM would be to stop generating packed unwind info with H=1 altogether (which I presume we haven't ever done so far, outside of handwritten assembly in testcases). Even if the inconsistency gets fixed in newer versions of Windows, we can't start using it right away anyway.

(I noticed a similar issue when initially working on optimizing the ARM64 unwind info; .seh_save_next for float register pairs didn't work as expected at the time. This actually had been noticed within Microsoft at the same time, and does work as documented in Windows 11, but not before that. In that case, it was also the case that MSVC hadn't ever generated such unwind info, so there was no real world breakage. See https://github.com/llvm/llvm-project/blob/llvmorg-14.0.0/llvm/lib/MC/MCWin64EH.cpp#L573-L575 for our code where we decide not to do a potential simplification, to avoid this bug.)

@efriedma-quic
Copy link
Collaborator

And the dumpbin output also seems to agree with this.

I wouldn't be shocked if dumpbin is wrong. See also #54900 .

you will see that the unwinder does indeed believe that the epilogue does contain similar mirrored nop instructions

Umm, what? Really? That's seems like a pretty big oversight if the "H" bit is simply broken.

Therefore, I guess the only practical thing we can do in LLVM would be to stop generating packed unwind info with H=1 altogether

That's kind of ugly. But if that's how the unwinder actually behaves, I guess H=1 is completely useless in practice.

which I presume we haven't ever done so far, outside of handwritten assembly in testcases

We don't have any code to insert nops in the epilogue, no. :)


If we know the Microsoft documentation is wrong, can we try to fix it?

@mstorsjo
Copy link
Member

you will see that the unwinder does indeed believe that the epilogue does contain similar mirrored nop instructions

Umm, what? Really? That's seems like a pretty big oversight if the "H" bit is simply broken.

Indeed - but I guess it can happen if you don't have through unit tests that test unwinding from every single instruction location in prologues/epilogues, if the compiler doesn't happen to ever emit it.

If we know the Microsoft documentation is wrong, can we try to fix it?

I guess that could be done...

I have gotten fixes for some other cases merged there before, but those were bugs in the documentation (while the implementation was sensible and correct). And for other bugs that are found, like e.g. .seh_save_next for float register pairs, which was broken in Windows 10 but is fixed in 11 - would they accept a patch that documents this (to let other implementers know to avoid it if they care about older versions)? Anyway, I guess I can try to make a patch to the docs there to bring the issue up for discussion.

@zhaoshiz
Copy link
Contributor

So there's inconsistency between MS documentation and windows unwinder (RtlVirtualUnwind) behavior. I'll hold on the patch for now.

@mstorsjo
Copy link
Member

I posted https://reviews.llvm.org/D125876 to make us stop producing packed unwind info, for this inconsistent state. Our current code does work correctly on current Windows unwinders, but if they fix their implementation to match the docs, it would break in the future. So let's stop producing such code. The current code doesn't ever match for compiler generated code anyway, only for handcrafted cases.

mstorsjo added a commit that referenced this issue May 18, 2022
…meters

There's an inconsistency regarding the epilogs of packed ARM64
unwind info with homed parameters; according to the documentation
(and according to common sense), the epilog wouldn't have a series
of nop instructions matching the stp x0-x7 in the prolog - however
in practice, RtlVirtualUnwind still seems to behave as if the epilog
does have the mirrored nops from the prolog.

In practice, MSVC doesn't seem to produce packed unwind info with
homed parameters, which might be why this inconsistency hasn't
been noticed.

Thus, to play it safe, avoid creating such packed unwind info with
homed parameters. (LLVM's current behaviour matches the current
runtime behaviour of RtlVirtualUnwind, but if it later is bug fixed
to match the documentation, such unwind information would be
incorrect.)

See #54879 for further
discussion on the matter.

Differential Revision: https://reviews.llvm.org/D125876
mem-frob pushed a commit to draperlaboratory/hope-llvm-project that referenced this issue Oct 7, 2022
…meters

There's an inconsistency regarding the epilogs of packed ARM64
unwind info with homed parameters; according to the documentation
(and according to common sense), the epilog wouldn't have a series
of nop instructions matching the stp x0-x7 in the prolog - however
in practice, RtlVirtualUnwind still seems to behave as if the epilog
does have the mirrored nops from the prolog.

In practice, MSVC doesn't seem to produce packed unwind info with
homed parameters, which might be why this inconsistency hasn't
been noticed.

Thus, to play it safe, avoid creating such packed unwind info with
homed parameters. (LLVM's current behaviour matches the current
runtime behaviour of RtlVirtualUnwind, but if it later is bug fixed
to match the documentation, such unwind information would be
incorrect.)

See llvm/llvm-project#54879 for further
discussion on the matter.

Differential Revision: https://reviews.llvm.org/D125876
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

6 participants