diff --git a/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp b/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp index e9081a4ae4e72..7640d7090949c 100644 --- a/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp +++ b/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp @@ -201,15 +201,11 @@ bool X86WinEHUnwindV2::runOnMachineFunction(MachineFunction &MF) { "The epilog is deallocating a stack " "allocation, but the prolog did " "not allocate one"); - if (HasStackDealloc) + if (PoppedRegCount > 0) return rejectCurrentFunctionInternalError( MF, Mode, - "The epilog is deallocating the stack " - "allocation more than once"); - if (PoppedRegCount > 0) - llvm_unreachable( - "Should have raised an error: either popping before " - "deallocating or deallocating without an allocation"); + "The epilog is deallocating a stack allocation after popping " + "registers"); HasStackDealloc = true; } else if (State == FunctionState::FinishedEpilog) @@ -219,33 +215,41 @@ bool X86WinEHUnwindV2::runOnMachineFunction(MachineFunction &MF) { case X86::POP64r: if (State == FunctionState::InEpilog) { - // After the stack pointer has been adjusted, the epilog must - // POP each register in reverse order of the PUSHes in the prolog. - PoppedRegCount++; - if (HasStackAlloc != HasStackDealloc) - return rejectCurrentFunctionInternalError( - MF, Mode, - "Cannot pop registers before the stack " - "allocation has been deallocated"); - if (PoppedRegCount > PushedRegs.size()) - return rejectCurrentFunctionInternalError( - MF, Mode, - "The epilog is popping more registers than the prolog pushed"); - if (PushedRegs[PushedRegs.size() - PoppedRegCount] != - MI.getOperand(0).getReg()) - return rejectCurrentFunctionInternalError( - MF, Mode, - "The epilog is popping a registers in " - "a different order than the " - "prolog pushed them"); - - // Unwind v2 records the size of the epilog not from where we place - // SEH_BeginEpilogue (as that contains the instruction to adjust the - // stack pointer) but from the first POP instruction (if there is - // one). - if (!UnwindV2StartLocation) { - assert(PoppedRegCount == 1); - UnwindV2StartLocation = &MI; + Register Reg = MI.getOperand(0).getReg(); + if (HasStackAlloc && (PoppedRegCount == 0) && + !llvm::is_contained(PushedRegs, Reg)) { + // If this is a pop that doesn't correspond to the set of pushed + // registers, then assume it was used to adjust the stack pointer. + HasStackDealloc = true; + } else { + // After the stack pointer has been adjusted, the epilog must + // POP each register in reverse order of the PUSHes in the prolog. + PoppedRegCount++; + if (HasStackAlloc != HasStackDealloc) + return rejectCurrentFunctionInternalError( + MF, Mode, + "Cannot pop registers before the stack " + "allocation has been deallocated"); + if (PoppedRegCount > PushedRegs.size()) + return rejectCurrentFunctionInternalError( + MF, Mode, + "The epilog is popping more registers than the prolog " + "pushed"); + if (PushedRegs[PushedRegs.size() - PoppedRegCount] != Reg) + return rejectCurrentFunctionInternalError( + MF, Mode, + "The epilog is popping a registers in " + "a different order than the " + "prolog pushed them"); + + // Unwind v2 records the size of the epilog not from where we place + // SEH_BeginEpilogue (as that contains the instruction to adjust the + // stack pointer) but from the first POP instruction (if there is + // one). + if (!UnwindV2StartLocation) { + assert(PoppedRegCount == 1); + UnwindV2StartLocation = &MI; + } } } else if (State == FunctionState::FinishedEpilog) // Unexpected instruction after the epilog. diff --git a/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir b/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir index f099d4fddcb33..ed97e52f2d5c5 100644 --- a/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir +++ b/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir @@ -97,38 +97,6 @@ body: | RET64 ... -;--- double_dealloc.mir -# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - %t/double_dealloc.mir \ -# RUN: -run-pass=x86-wineh-unwindv2 2>&1 | FileCheck %s \ -# RUN: --check-prefix=DOUBLE-DEALLOC -# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/double_dealloc.mir \ -# RUN: -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \ -# RUN: FileCheck %s --check-prefix=BESTEFFORT -# DOUBLE-DEALLOC: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'double_dealloc': -# DOUBLE-DEALLOC-SAME: The epilog is deallocating the stack allocation more than once - ---- | - define dso_local void @double_dealloc() local_unnamed_addr { - entry: - ret void - } - !llvm.module.flags = !{!0} - !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2} -... ---- -name: double_dealloc -body: | - bb.0.entry: - $rsp = frame-setup SUB64ri32 $rsp, 40, implicit-def dead $eflags - frame-setup SEH_StackAlloc 40 - frame-setup SEH_EndPrologue - SEH_BeginEpilogue - $rsp = frame-destroy ADD64ri32 $rsp, 40, implicit-def dead $eflags - $rsp = frame-destroy ADD64ri32 $rsp, 40, implicit-def dead $eflags - SEH_EndEpilogue - RET64 -... - ;--- dealloc_after_epilog.mir # RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \ # RUN: %t/dealloc_after_epilog.mir -run-pass=x86-wineh-unwindv2 2>&1 | \ @@ -316,3 +284,38 @@ body: | $ecx = MOV32rr killed $eax RET64 ... + +;--- dealloc_pop_dealloc.mir +# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \ +# RUN: %t/dealloc_pop_dealloc.mir -run-pass=x86-wineh-unwindv2 2>&1 | \ +# RUN: FileCheck %s --check-prefix=DEALLOC-POP-DEALLOC +# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/dealloc_pop_dealloc.mir \ +# RUN: -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \ +# RUN: FileCheck %s --check-prefix=BESTEFFORT +# DEALLOC-POP-DEALLOC: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'dealloc_pop_dealloc': +# DEALLOC-POP-DEALLOC-SAME: The epilog is deallocating a stack allocation after popping registers + +--- | + define dso_local void @dealloc_pop_dealloc() local_unnamed_addr { + entry: + ret void + } + !llvm.module.flags = !{!0} + !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2} +... +--- +name: dealloc_pop_dealloc +body: | + bb.0.entry: + frame-setup PUSH64r killed $rdi, implicit-def $rsp, implicit $rsp + frame-setup SEH_PushReg 55 + $rsp = frame-setup SUB64ri32 $rsp, 40, implicit-def dead $eflags + frame-setup SEH_StackAlloc 40 + frame-setup SEH_EndPrologue + SEH_BeginEpilogue + $rsp = frame-destroy ADD64ri32 $rsp, 20, implicit-def dead $eflags + $rdi = frame-destroy POP64r implicit-def $rsp, implicit $rsp + $rsp = frame-destroy ADD64ri32 $rsp, 20, implicit-def dead $eflags + SEH_EndEpilogue + RET64 +... diff --git a/llvm/test/CodeGen/X86/win64-eh-unwindv2-push-pop-stack-alloc.mir b/llvm/test/CodeGen/X86/win64-eh-unwindv2-push-pop-stack-alloc.mir new file mode 100644 index 0000000000000..09a839f00814a --- /dev/null +++ b/llvm/test/CodeGen/X86/win64-eh-unwindv2-push-pop-stack-alloc.mir @@ -0,0 +1,199 @@ +# RUN: llc -o - %s -mtriple=x86_64-unknown-windows-msvc \ +# RUN: -run-pass=x86-wineh-unwindv2 | FileCheck %s + +# Regression test for Win x64 unwind v2: in some cases it is better to use +# push+pop to adjust the stack, rather than sub+add. This is permitted with +# unwind v2 as the requirement is that the epilog finishes adjusting the stack +# before popping the registers listed in the unwind table. + +# Pushes and pops the same register. +# CHECK-LABEL: name: push_pop_same +# CHECK: body: +# CHECK-NEXT: bb.0 +# CHECK-NEXT: SEH_UnwindVersion 2 +# CHECK-NEXT: frame-setup PUSH64r undef $rax +# CHECK-NEXT: frame-setup SEH_StackAlloc 8 +# CHECK-NEXT: frame-setup SEH_EndPrologue +# CHECK-NEXT: SEH_BeginEpilogue +# CHECK-NEXT: $rax = frame-destroy +# CHECK-NEXT: SEH_UnwindV2Start +# CHECK-NEXT: SEH_EndEpilogue +# CHECK-NEXT: RET64 + +# Pushes and pops a different register. +# CHECK-LABEL: name: push_pop_different +# CHECK: body: +# CHECK-NEXT: bb.0 +# CHECK-NEXT: SEH_UnwindVersion 2 +# CHECK-NEXT: frame-setup PUSH64r undef $rax +# CHECK-NEXT: frame-setup SEH_StackAlloc 8 +# CHECK-NEXT: frame-setup SEH_EndPrologue +# CHECK: SEH_BeginEpilogue +# CHECK-NEXT: $rcx = frame-destroy POP64r +# CHECK-NEXT: SEH_UnwindV2Start +# CHECK-NEXT: SEH_EndEpilogue +# CHECK-NEXT: RET64 $eax + +# Pushes in the prolog, adds in the epilog. +# CHECK-LABEL: name: push_add +# CHECK: body: +# CHECK-NEXT: bb.0 +# CHECK-NEXT: SEH_UnwindVersion 2 +# CHECK-NEXT: frame-setup PUSH64r killed $r15 +# CHECK-NEXT: frame-setup SEH_PushReg 126 +# CHECK-NEXT: frame-setup PUSH64r killed $r14 +# CHECK-NEXT: frame-setup SEH_PushReg 125 +# CHECK-NEXT: frame-setup PUSH64r killed $rsi +# CHECK-NEXT: frame-setup SEH_PushReg 60 +# CHECK-NEXT: frame-setup PUSH64r killed $rdi +# CHECK-NEXT: frame-setup SEH_PushReg 55 +# CHECK-NEXT: frame-setup PUSH64r killed $rbx +# CHECK-NEXT: frame-setup SEH_PushReg 53 +# CHECK-NEXT: frame-setup PUSH64r undef $rax +# CHECK-NEXT: frame-setup SEH_StackAlloc 8 +# CHECK-NEXT: frame-setup SEH_EndPrologue +# CHECK: SEH_BeginEpilogue +# CHECK-NEXT: $rsp = frame-destroy ADD64ri32 $rsp, 8 +# CHECK-NEXT: SEH_UnwindV2Start +# CHECK-NEXT: $rbx = frame-destroy POP64r +# CHECK-NEXT: $rdi = frame-destroy POP64r +# CHECK-NEXT: $rsi = frame-destroy POP64r +# CHECK-NEXT: $r14 = frame-destroy POP64r +# CHECK-NEXT: $r15 = frame-destroy POP64r +# CHECK-NEXT: SEH_EndEpilogue +# CHECK-NEXT: RET64 + +--- | + define void @push_pop_same() { + %small_alloca = alloca i32, align 4 + ret void + } + + define i32 @push_pop_different(i32 %x, i32 %y) { + %small_alloca = alloca i32, align 4 + %sum = add i32 %x, %y + ret i32 %sum + } + + define void @push_add(ptr %a, ptr %b, ptr %out) { + %small_alloca = alloca i32, align 4 + %av = load i256, ptr %a, align 16 + %bv = load i256, ptr %b, align 16 + %r = mul i256 %av, %bv + store i256 %r, ptr %out, align 16 + ret void + } + + !llvm.module.flags = !{!0} + + !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2} +... +--- +name: push_pop_same +body: | + bb.0 (%ir-block.0): + frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + frame-setup SEH_StackAlloc 8 + frame-setup SEH_EndPrologue + SEH_BeginEpilogue + $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp + SEH_EndEpilogue + RET64 +... +--- +name: push_pop_different +body: | + bb.0 (%ir-block.0): + frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + frame-setup SEH_StackAlloc 8 + frame-setup SEH_EndPrologue + renamable $edx = KILL $edx, implicit-def $rdx + renamable $ecx = KILL $ecx, implicit-def $rcx + renamable $eax = LEA64_32r killed renamable $rcx, 1, killed renamable $rdx, 0, $noreg + SEH_BeginEpilogue + $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp + SEH_EndEpilogue + RET64 $eax +... +--- +name: push_add +body: | + bb.0 (%ir-block.0): + + frame-setup PUSH64r killed $r15, implicit-def $rsp, implicit $rsp + frame-setup SEH_PushReg 126 + frame-setup PUSH64r killed $r14, implicit-def $rsp, implicit $rsp + frame-setup SEH_PushReg 125 + frame-setup PUSH64r killed $rsi, implicit-def $rsp, implicit $rsp + frame-setup SEH_PushReg 60 + frame-setup PUSH64r killed $rdi, implicit-def $rsp, implicit $rsp + frame-setup SEH_PushReg 55 + frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + frame-setup SEH_PushReg 53 + frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + frame-setup SEH_StackAlloc 8 + frame-setup SEH_EndPrologue + $rsi = MOV64rr $rdx + renamable $r9 = MOV64rm renamable $rcx, 1, $noreg, 0, $noreg :: (load (s64) from %ir.a, align 16) + renamable $rdi = MOV64rm renamable $rcx, 1, $noreg, 8, $noreg :: (load (s64) from %ir.a + 8, basealign 16) + renamable $rbx = MOV64rm renamable $rcx, 1, $noreg, 16, $noreg :: (load (s64) from %ir.a + 16, align 16) + renamable $r10 = MOV64rm $rdx, 1, $noreg, 16, $noreg :: (load (s64) from %ir.b + 16, align 16) + renamable $r11 = MOV64rm $rdx, 1, $noreg, 0, $noreg :: (load (s64) from %ir.b, align 16) + renamable $r14 = MOV64rm $rdx, 1, $noreg, 8, $noreg :: (load (s64) from %ir.b + 8, basealign 16) + renamable $r15 = MOV64rm killed renamable $rcx, 1, $noreg, 24, $noreg :: (load (s64) from %ir.a + 24, basealign 16) + renamable $r15 = IMUL64rr killed renamable $r15, renamable $r11, implicit-def dead $eflags + $rax = MOV64rr $r11 + MUL64r renamable $rbx, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax + $rcx = MOV64rr $rax + renamable $rbx = IMUL64rr killed renamable $rbx, renamable $r14, implicit-def dead $eflags + renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $rdx, implicit-def dead $eflags + renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $r15, implicit-def dead $eflags + $r15 = MOV64rr $r10 + renamable $r15 = IMUL64rr killed renamable $r15, renamable $rdi, implicit-def dead $eflags + $rax = MOV64rr killed $r10 + MUL64r renamable $r9, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax + $r10 = MOV64rr $rax + renamable $rdx = ADD64rr killed renamable $rdx, killed renamable $r15, implicit-def dead $eflags + renamable $r15 = MOV64rm killed renamable $rsi, 1, $noreg, 24, $noreg :: (load (s64) from %ir.b + 24, basealign 16) + renamable $r15 = IMUL64rr killed renamable $r15, renamable $r9, implicit-def dead $eflags + renamable $r15 = ADD64rr killed renamable $r15, killed renamable $rdx, implicit-def dead $eflags + renamable $r10 = ADD64rr killed renamable $r10, killed renamable $rcx, implicit-def $eflags + renamable $r15 = ADC64rr killed renamable $r15, killed renamable $rbx, implicit-def dead $eflags, implicit killed $eflags + $rax = MOV64rr $r9 + MUL64r renamable $r11, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax + $rcx = MOV64rr $rdx + $rsi = MOV64rr $rax + $rax = MOV64rr $rdi + MUL64r killed renamable $r11, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax + $r11 = MOV64rr $rdx + $rbx = MOV64rr $rax + renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $rcx, implicit-def $eflags + renamable $r11 = ADC64ri32 killed renamable $r11, 0, implicit-def dead $eflags, implicit killed $eflags + $rax = MOV64rr killed $r9 + MUL64r renamable $r14, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax + $rcx = MOV64rr $rdx + $r9 = MOV64rr $rax + renamable $r9 = ADD64rr killed renamable $r9, killed renamable $rbx, implicit-def $eflags + renamable $rcx = ADC64rr killed renamable $rcx, killed renamable $r11, implicit-def $eflags, implicit killed $eflags + renamable $al = SETCCr 2, implicit killed $eflags + renamable $r11d = MOVZX32rr8 killed renamable $al, implicit-def $r11 + $rax = MOV64rr killed $rdi + MUL64r killed renamable $r14, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax + renamable $rax = ADD64rr killed renamable $rax, killed renamable $rcx, implicit-def $eflags + renamable $rdx = ADC64rr killed renamable $rdx, killed renamable $r11, implicit-def dead $eflags, implicit killed $eflags + renamable $rax = ADD64rr killed renamable $rax, killed renamable $r10, implicit-def $eflags + renamable $rdx = ADC64rr killed renamable $rdx, killed renamable $r15, implicit-def dead $eflags, implicit killed $eflags + MOV64mr renamable $r8, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store (s64) into %ir.out, align 16) + MOV64mr renamable $r8, 1, $noreg, 8, $noreg, killed renamable $r9 :: (store (s64) into %ir.out + 8, basealign 16) + MOV64mr renamable $r8, 1, $noreg, 16, $noreg, killed renamable $rax :: (store (s64) into %ir.out + 16, align 16) + MOV64mr killed renamable $r8, 1, $noreg, 24, $noreg, killed renamable $rdx :: (store (s64) into %ir.out + 24, basealign 16) + SEH_BeginEpilogue + $rsp = frame-destroy ADD64ri32 $rsp, 8, implicit-def dead $eflags + $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp + $rdi = frame-destroy POP64r implicit-def $rsp, implicit $rsp + $rsi = frame-destroy POP64r implicit-def $rsp, implicit $rsp + $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp + $r15 = frame-destroy POP64r implicit-def $rsp, implicit $rsp + SEH_EndEpilogue + RET64 +...