diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4dc2ff481785db..bac8838a0adcc7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3808,7 +3808,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, "the only memory argument"); } - if (!IsSibcall) + if (!IsSibcall && !IsMustTail) Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, NumBytes - NumBytesToPush, dl); @@ -4093,7 +4093,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector Ops; - if (!IsSibcall && isTailCall) { + if (!IsSibcall && isTailCall && !IsMustTail) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytesToPop, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); diff --git a/llvm/test/CodeGen/X86/cfguard-checks.ll b/llvm/test/CodeGen/X86/cfguard-checks.ll index efd6e7bc11b63c..6e8d71411e8267 100644 --- a/llvm/test/CodeGen/X86/cfguard-checks.ll +++ b/llvm/test/CodeGen/X86/cfguard-checks.ll @@ -171,8 +171,7 @@ entry: ; X64-LABEL: func_cf_tail ; X64: leaq target_func(%rip), %rax - ; X64: movq __guard_dispatch_icall_fptr(%rip), %rcx - ; X64: rex64 jmpq *%rcx # TAILCALL + ; X64: rex64 jmpq *__guard_dispatch_icall_fptr(%rip) # TAILCALL ; X64-NOT: callq } @@ -205,7 +204,6 @@ entry: ; X64: movq (%rcx), %rax ; X64-NEXT: movq 8(%rax), %rax ; X64-NEXT: movq __guard_dispatch_icall_fptr(%rip), %rdx - ; X64-NEXT: addq $40, %rsp ; X64-NEXT: rex64 jmpq *%rdx # TAILCALL ; X64-NOT: callq } diff --git a/llvm/test/CodeGen/X86/musttail-inalloca.ll b/llvm/test/CodeGen/X86/musttail-inalloca.ll new file mode 100644 index 00000000000000..c17d29c4c71d3f --- /dev/null +++ b/llvm/test/CodeGen/X86/musttail-inalloca.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - | FileCheck %s + +; Previously, we would accidentally leave behind SP adjustments to setup a call +; frame for the musttail call target, and SP adjustments would end up +; unbalanced. Reported as https://crbug.com/1026882. + +target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc19.16.0" + +; 20 bytes of memory. +%struct.Args = type { i32, i32, i32, i32, i32 } + +declare dso_local x86_thiscallcc void @methodWithVtorDisp(i8* nocapture readonly, <{ %struct.Args }>* inalloca) + +define dso_local x86_thiscallcc void @methodWithVtorDisp_thunk(i8* %0, <{ %struct.Args }>* inalloca %1) #0 { +; CHECK-LABEL: methodWithVtorDisp_thunk: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl %ecx, %esi +; CHECK-NEXT: subl -4(%ecx), %esi +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl $_methodWithVtorDisp_thunk +; CHECK-NEXT: calll ___cyg_profile_func_exit +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: popl %esi +; CHECK-NEXT: jmp _methodWithVtorDisp # TAILCALL + %3 = getelementptr inbounds i8, i8* %0, i32 -4 + %4 = bitcast i8* %3 to i32* + %5 = load i32, i32* %4, align 4 + %6 = sub i32 0, %5 + %7 = getelementptr i8, i8* %0, i32 %6 + musttail call x86_thiscallcc void @methodWithVtorDisp(i8* %7, <{ %struct.Args }>* inalloca nonnull %1) + ret void +} + +attributes #0 = { nounwind optsize "instrument-function-exit-inlined"="__cyg_profile_func_exit" } diff --git a/llvm/test/CodeGen/X86/musttail-tailcc.ll b/llvm/test/CodeGen/X86/musttail-tailcc.ll index 6057045a77dfa9..4de8d4e2be1289 100644 --- a/llvm/test/CodeGen/X86/musttail-tailcc.ll +++ b/llvm/test/CodeGen/X86/musttail-tailcc.ll @@ -9,8 +9,6 @@ declare tailcc i32 @tailcallee(i32 %a1, i32 %a2) define tailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind { ; X64-LABEL: tailcaller: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax -; X64-NEXT: popq %rax ; X64-NEXT: jmp tailcallee # TAILCALL ; ; X32-LABEL: tailcaller: @@ -26,8 +24,6 @@ declare tailcc i8* @alias_callee() define tailcc noalias i8* @noalias_caller() nounwind { ; X64-LABEL: noalias_caller: ; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: popq %rax ; X64-NEXT: jmp alias_callee # TAILCALL ; ; X32-LABEL: noalias_caller: @@ -42,8 +38,6 @@ declare tailcc noalias i8* @noalias_callee() define tailcc i8* @alias_caller() nounwind { ; X64-LABEL: alias_caller: ; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: popq %rax ; X64-NEXT: jmp noalias_callee # TAILCALL ; ; X32-LABEL: alias_caller: @@ -56,25 +50,17 @@ define tailcc i8* @alias_caller() nounwind { define tailcc void @void_test(i32, i32, i32, i32) { ; X64-LABEL: void_test: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax -; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: popq %rax -; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: jmp void_test # TAILCALL ; ; X32-LABEL: void_test: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_def_cfa_offset 16 ; X32-NEXT: .cfi_offset %esi, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: addl $8, %esp -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: jmp void_test # TAILCALL @@ -86,25 +72,17 @@ define tailcc void @void_test(i32, i32, i32, i32) { define tailcc i1 @i1test(i32, i32, i32, i32) { ; X64-LABEL: i1test: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax -; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: popq %rax -; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: jmp i1test # TAILCALL ; ; X32-LABEL: i1test: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: subl $8, %esp -; X32-NEXT: .cfi_def_cfa_offset 16 ; X32-NEXT: .cfi_offset %esi, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X32-NEXT: addl $8, %esp -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: jmp i1test # TAILCALL diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll index 5f5ce5f9af9135..5d88f0dd11cd63 100644 --- a/llvm/test/CodeGen/X86/musttail-varargs.ll +++ b/llvm/test/CodeGen/X86/musttail-varargs.ll @@ -246,12 +246,13 @@ define void @f_thunk(i8* %this, ...) { ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-16, %esp -; X86-NOSSE-NEXT: subl $48, %esp +; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: movl 8(%ebp), %esi ; X86-NOSSE-NEXT: leal 12(%ebp), %eax -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %esi, (%esp) +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: calll _get_f +; X86-NOSSE-NEXT: addl $4, %esp ; X86-NOSSE-NEXT: movl %esi, 8(%ebp) ; X86-NOSSE-NEXT: leal -4(%ebp), %esp ; X86-NOSSE-NEXT: popl %esi @@ -264,17 +265,18 @@ define void @f_thunk(i8* %this, ...) { ; X86-SSE-NEXT: movl %esp, %ebp ; X86-SSE-NEXT: pushl %esi ; X86-SSE-NEXT: andl $-16, %esp -; X86-SSE-NEXT: subl $96, %esp +; X86-SSE-NEXT: subl $80, %esp ; X86-SSE-NEXT: movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X86-SSE-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X86-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X86-SSE-NEXT: movaps %xmm0, (%esp) # 16-byte Spill ; X86-SSE-NEXT: movl 8(%ebp), %esi ; X86-SSE-NEXT: leal 12(%ebp), %eax ; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %esi, (%esp) +; X86-SSE-NEXT: pushl %esi ; X86-SSE-NEXT: calll _get_f +; X86-SSE-NEXT: addl $4, %esp ; X86-SSE-NEXT: movl %esi, 8(%ebp) -; X86-SSE-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload +; X86-SSE-NEXT: movaps (%esp), %xmm0 # 16-byte Reload ; X86-SSE-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload ; X86-SSE-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload ; X86-SSE-NEXT: leal -4(%ebp), %esp @@ -300,38 +302,21 @@ define void @f_thunk(i8* %this, ...) { define void @g_thunk(i8* %fptr_i8, ...) { ; LINUX-LABEL: g_thunk: ; LINUX: # %bb.0: -; LINUX-NEXT: pushq %rax -; LINUX-NEXT: .cfi_def_cfa_offset 16 -; LINUX-NEXT: popq %r11 -; LINUX-NEXT: .cfi_def_cfa_offset 8 ; LINUX-NEXT: jmpq *%rdi # TAILCALL ; ; LINUX-X32-LABEL: g_thunk: ; LINUX-X32: # %bb.0: -; LINUX-X32-NEXT: pushq %rax -; LINUX-X32-NEXT: .cfi_def_cfa_offset 16 ; LINUX-X32-NEXT: movl %edi, %r11d -; LINUX-X32-NEXT: addl $8, %esp -; LINUX-X32-NEXT: .cfi_def_cfa_offset 8 ; LINUX-X32-NEXT: jmpq *%r11 # TAILCALL ; ; WINDOWS-LABEL: g_thunk: ; WINDOWS: # %bb.0: -; WINDOWS-NEXT: subq $40, %rsp -; WINDOWS-NEXT: .seh_stackalloc 40 -; WINDOWS-NEXT: .seh_endprologue -; WINDOWS-NEXT: addq $40, %rsp ; WINDOWS-NEXT: rex64 jmpq *%rcx # TAILCALL -; WINDOWS-NEXT: .seh_handlerdata -; WINDOWS-NEXT: .text -; WINDOWS-NEXT: .seh_endproc ; ; X86-LABEL: g_thunk: ; X86: # %bb.0: -; X86-NEXT: pushl %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: popl %ecx ; X86-NEXT: jmpl *%eax # TAILCALL %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)* musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...) @@ -347,78 +332,53 @@ define void @g_thunk(i8* %fptr_i8, ...) { define void @h_thunk(%struct.Foo* %this, ...) { ; LINUX-LABEL: h_thunk: ; LINUX: # %bb.0: -; LINUX-NEXT: pushq %rax -; LINUX-NEXT: .cfi_def_cfa_offset 16 ; LINUX-NEXT: cmpb $1, (%rdi) ; LINUX-NEXT: jne .LBB2_2 ; LINUX-NEXT: # %bb.1: # %then ; LINUX-NEXT: movq 8(%rdi), %r11 -; LINUX-NEXT: addq $8, %rsp -; LINUX-NEXT: .cfi_def_cfa_offset 8 ; LINUX-NEXT: jmpq *%r11 # TAILCALL ; LINUX-NEXT: .LBB2_2: # %else -; LINUX-NEXT: .cfi_def_cfa_offset 16 ; LINUX-NEXT: movq 16(%rdi), %r11 ; LINUX-NEXT: movl $42, {{.*}}(%rip) -; LINUX-NEXT: addq $8, %rsp -; LINUX-NEXT: .cfi_def_cfa_offset 8 ; LINUX-NEXT: jmpq *%r11 # TAILCALL ; ; LINUX-X32-LABEL: h_thunk: ; LINUX-X32: # %bb.0: -; LINUX-X32-NEXT: pushq %rax -; LINUX-X32-NEXT: .cfi_def_cfa_offset 16 ; LINUX-X32-NEXT: cmpb $1, (%edi) ; LINUX-X32-NEXT: jne .LBB2_2 ; LINUX-X32-NEXT: # %bb.1: # %then ; LINUX-X32-NEXT: movl 4(%edi), %r11d -; LINUX-X32-NEXT: addl $8, %esp -; LINUX-X32-NEXT: .cfi_def_cfa_offset 8 ; LINUX-X32-NEXT: jmpq *%r11 # TAILCALL ; LINUX-X32-NEXT: .LBB2_2: # %else -; LINUX-X32-NEXT: .cfi_def_cfa_offset 16 ; LINUX-X32-NEXT: movl 8(%edi), %r11d ; LINUX-X32-NEXT: movl $42, {{.*}}(%rip) -; LINUX-X32-NEXT: addl $8, %esp -; LINUX-X32-NEXT: .cfi_def_cfa_offset 8 ; LINUX-X32-NEXT: jmpq *%r11 # TAILCALL ; ; WINDOWS-LABEL: h_thunk: ; WINDOWS: # %bb.0: -; WINDOWS-NEXT: subq $40, %rsp -; WINDOWS-NEXT: .seh_stackalloc 40 -; WINDOWS-NEXT: .seh_endprologue ; WINDOWS-NEXT: cmpb $1, (%rcx) ; WINDOWS-NEXT: jne .LBB2_2 ; WINDOWS-NEXT: # %bb.1: # %then ; WINDOWS-NEXT: movq 8(%rcx), %rax -; WINDOWS-NEXT: addq $40, %rsp ; WINDOWS-NEXT: rex64 jmpq *%rax # TAILCALL ; WINDOWS-NEXT: .LBB2_2: # %else ; WINDOWS-NEXT: movq 16(%rcx), %rax ; WINDOWS-NEXT: movl $42, {{.*}}(%rip) -; WINDOWS-NEXT: addq $40, %rsp ; WINDOWS-NEXT: rex64 jmpq *%rax # TAILCALL -; WINDOWS-NEXT: .seh_handlerdata -; WINDOWS-NEXT: .text -; WINDOWS-NEXT: .seh_endproc ; ; X86-LABEL: h_thunk: ; X86: # %bb.0: -; X86-NEXT: pushl %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpb $1, (%eax) ; X86-NEXT: jne LBB2_2 ; X86-NEXT: # %bb.1: # %then ; X86-NEXT: movl 4(%eax), %ecx ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: popl %eax ; X86-NEXT: jmpl *%ecx # TAILCALL ; X86-NEXT: LBB2_2: # %else ; X86-NEXT: movl 8(%eax), %ecx ; X86-NEXT: movl $42, _g ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: popl %eax ; X86-NEXT: jmpl *%ecx # TAILCALL %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0 %cond = load i1, i1* %cond_p