355 changes: 249 additions & 106 deletions llvm/lib/Target/X86/X86FrameLowering.cpp

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions llvm/lib/Target/X86/X86FrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ class X86FrameLowering : public TargetFrameLowering {
: TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}

void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL,
unsigned FramePtr) const;
MachineBasicBlock::iterator MBBI,
DebugLoc DL) const;

/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
Expand All @@ -42,6 +42,11 @@ class X86FrameLowering : public TargetFrameLowering {
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = nullptr) const override;

bool
assignCalleeSavedSpillSlots(MachineFunction &MF,
const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI) const override;

bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,9 +605,8 @@ void X86TargetLowering::resetOperationActions() {
}

// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
!Subtarget->isTargetELF() &&
!Subtarget->isTargetCygMing()) {
if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
!Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}

Expand Down
24 changes: 22 additions & 2 deletions llvm/lib/Target/X86/X86InstrCompiler.td
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in

// When using segmented stacks these are lowered into instructions which first
// check if the current stacklet has enough free memory. If it does, memory is
// allocated by bumping the stack pointer. Otherwise memory is allocated from
// allocated by bumping the stack pointer. Otherwise memory is allocated from
// the heap.

let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
Expand Down Expand Up @@ -196,6 +196,26 @@ let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
"#EH_SjLj_Setup\t$dst", []>;
}

//===----------------------------------------------------------------------===//
// Pseudo instructions used by unwind info.
//
let isPseudo = 1 in {
def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
"#SEH_PushReg $reg", []>;
def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
"#SEH_SaveReg $reg, $dst", []>;
def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
"#SEH_SaveXMM $reg, $dst", []>;
def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
"#SEH_StackAlloc $size", []>;
def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
"#SEH_SetFrame $reg, $offset", []>;
def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
"#SEH_PushFrame $mode", []>;
def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
"#SEH_EndPrologue", []>;
}

//===----------------------------------------------------------------------===//
// Pseudo instructions used by segmented stacks.
//
Expand Down Expand Up @@ -371,7 +391,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
Requires<[In64BitMode]>;

let Uses = [RAX,RCX,RDI] in
def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
[(X86rep_stos i64)], IIC_REP_STOS>, REP,
Expand Down
37 changes: 37 additions & 0 deletions llvm/lib/Target/X86/X86MCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//

#include "X86AsmPrinter.h"
#include "X86RegisterInfo.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "llvm/ADT/SmallString.h"
Expand Down Expand Up @@ -779,6 +780,9 @@ static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM,

void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
const X86RegisterInfo *RI =
static_cast<const X86RegisterInfo *>(TM.getRegisterInfo());

switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
Expand Down Expand Up @@ -883,6 +887,39 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(X86::R10)
.addReg(X86::RAX));
return;

case X86::SEH_PushReg:
OutStreamer.EmitWin64EHPushReg(
RI->getSEHRegNum(MI->getOperand(0).getImm()));
return;

case X86::SEH_SaveReg:
OutStreamer.EmitWin64EHSaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
MI->getOperand(1).getImm());
return;

case X86::SEH_SaveXMM:
OutStreamer.EmitWin64EHSaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
MI->getOperand(1).getImm());
return;

case X86::SEH_StackAlloc:
OutStreamer.EmitWin64EHAllocStack(MI->getOperand(0).getImm());
return;

case X86::SEH_SetFrame:
OutStreamer.EmitWin64EHSetFrame(
RI->getSEHRegNum(MI->getOperand(0).getImm()),
MI->getOperand(1).getImm());
return;

case X86::SEH_PushFrame:
OutStreamer.EmitWin64EHPushFrame(MI->getOperand(0).getImm());
return;

case X86::SEH_EndPrologue:
OutStreamer.EmitWin64EHEndProlog();
return;
}

MCInst TmpInst;
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/2007-05-05-Personality.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | FileCheck %s --check-prefix=LIN
; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=LIN
; RUN: llc < %s -mtriple=i386-pc-mingw32 -o - | FileCheck %s --check-prefix=WIN
; RUN: llc < %s -mtriple=i686-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN
; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN64

; LIN: .cfi_personality 0, __gnat_eh_personality
; LIN: .cfi_lsda 0, .Lexception0
; WIN: .cfi_personality 0, ___gnat_eh_personality
; WIN: .cfi_lsda 0, Lexception0
; WIN64: .seh_handler __gnat_eh_personality
; WIN64: .seh_handlerdata

@error = external global i8

Expand All @@ -15,7 +17,7 @@ entry:
invoke void @raise()
to label %eh_then unwind label %unwind

unwind: ; preds = %entry
unwind: ; preds = %entry
%eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*)
catch i8* @error
%eh_select = extractvalue { i8*, i32 } %eh_ptr, 1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
; CHECK: subq $40, %rsp
; CHECK: movaps %xmm8, (%rsp)
; CHECK: movaps %xmm7, 16(%rsp)
; CHECK: movaps %xmm8, 16(%rsp)
; CHECK: movaps %xmm7, (%rsp)

define i32 @a() nounwind {
entry:
Expand Down
62 changes: 33 additions & 29 deletions llvm/test/CodeGen/X86/avx-intel-ocl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@ declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
declare i32 @func_int(i32, i32)

; WIN64: testf16_inp
; WIN64-LABEL: testf16_inp
; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
; WIN64: leaq {{.*}}(%rsp), %rcx
; WIN64: call
; WIN64: ret

; X32: testf16_inp
; X32-LABEL: testf16_inp
; X32: movl %eax, (%esp)
; X32: vaddps {{.*}}, {{%ymm[0-1]}}
; X32: vaddps {{.*}}, {{%ymm[0-1]}}
; X32: call
; X32: ret

; X64: testf16_inp
; X64-LABEL: testf16_inp
; X64: vaddps {{.*}}, {{%ymm[0-1]}}
; X64: vaddps {{.*}}, {{%ymm[0-1]}}
; X64: leaq {{.*}}(%rsp), %rdi
Expand All @@ -41,14 +41,14 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
;test calling conventions - preserved registers

; preserved ymm6-ymm15
; WIN64: testf16_regs
; WIN64-LABEL: testf16_regs
; WIN64: call
; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
; WIN64: ret

; preserved ymm8-ymm15
; X64: testf16_regs
; X64-LABEL: testf16_regs
; X64: call
; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
Expand All @@ -65,28 +65,30 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
}

; test calling conventions - prolog and epilog
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
; WIN64-LABEL: test_prolog_epilog
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: call
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload

; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload

; X64-LABEL: test_prolog_epilog
; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
Expand All @@ -111,12 +113,14 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl

; test functions with integer parameters
; pass parameters on stack for 32-bit platform
; X32-LABEL: test_int
; X32: movl {{.*}}, 4(%esp)
; X32: movl {{.*}}, (%esp)
; X32: call
; X32: addl {{.*}}, %eax

; pass parameters in registers for 64-bit platform
; X64-LABEL: test_int
; X64: leal {{.*}}, %edi
; X64: movl {{.*}}, %esi
; X64: call
Expand All @@ -128,21 +132,21 @@ define i32 @test_int(i32 %a, i32 %b) nounwind {
ret i32 %c
}

; WIN64: test_float4
; WIN64-LABEL: test_float4
; WIN64-NOT: vzeroupper
; WIN64: call
; WIN64-NOT: vzeroupper
; WIN64: call
; WIN64: ret

; X64: test_float4
; X64-LABEL: test_float4
; X64-NOT: vzeroupper
; X64: call
; X64-NOT: vzeroupper
; X64: call
; X64: ret

; X32: test_float4
; X32-LABEL: test_float4
; X32: vzeroupper
; X32: call
; X32: vzeroupper
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/gcc_except_table.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ define i32 @main() uwtable optsize ssp {
; APPLE: GCC_except_table0:
; APPLE: Lexception0:

; MINGW64: .cfi_startproc
; MINGW64: .cfi_personality 0, __gxx_personality_v0
; MINGW64: .cfi_lsda 0, .Lexception0
; MINGW64: .cfi_def_cfa_offset 16
; MINGW64: .seh_proc
; MINGW64: .seh_handler __gxx_personality_v0
; MINGW64: .seh_setframe 5, 0
; MINGW64: callq _Unwind_Resume
; MINGW64: .cfi_endproc
; MINGW64: .seh_handlerdata
; MINGW64: GCC_except_table0:
; MINGW64: Lexception0:
; MINGW64: .seh_endproc

; MINGW32: .cfi_startproc
; MINGW32: .cfi_personality 0, ___gxx_personality_v0
Expand Down
170 changes: 170 additions & 0 deletions llvm/test/CodeGen/X86/win64_eh.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64

; Check function without prolog
define void @foo0() uwtable {
entry:
ret void
}
; WIN64-LABEL: foo0:
; WIN64: .seh_proc foo0
; WIN64: .seh_endprologue
; WIN64: ret
; WIN64: .seh_endproc

; Checks a small stack allocation
define void @foo1() uwtable {
entry:
%baz = alloca [2000 x i16], align 2
ret void
}
; WIN64-LABEL: foo1:
; WIN64: .seh_proc foo1
; WIN64: subq $4000, %rsp
; WIN64: .seh_stackalloc 4000
; WIN64: .seh_endprologue
; WIN64: addq $4000, %rsp
; WIN64: ret
; WIN64: .seh_endproc

; Checks a stack allocation requiring call to __chkstk/___chkstk_ms
define void @foo2() uwtable {
entry:
%baz = alloca [4000 x i16], align 2
ret void
}
; WIN64-LABEL: foo2:
; WIN64: .seh_proc foo2
; WIN64: movabsq $8000, %rax
; WIN64: callq {{__chkstk|___chkstk_ms}}
; WIN64: subq %rax, %rsp
; WIN64: .seh_stackalloc 8000
; WIN64: .seh_endprologue
; WIN64: addq $8000, %rsp
; WIN64: ret
; WIN64: .seh_endproc


; Checks stack push
define i32 @foo3(i32 %f_arg, i32 %e_arg, i32 %d_arg, i32 %c_arg, i32 %b_arg, i32 %a_arg) uwtable {
entry:
%a = alloca i32
%b = alloca i32
%c = alloca i32
%d = alloca i32
%e = alloca i32
%f = alloca i32
store i32 %a_arg, i32* %a
store i32 %b_arg, i32* %b
store i32 %c_arg, i32* %c
store i32 %d_arg, i32* %d
store i32 %e_arg, i32* %e
store i32 %f_arg, i32* %f
%tmp = load i32* %a
%tmp1 = mul i32 %tmp, 2
%tmp2 = load i32* %b
%tmp3 = mul i32 %tmp2, 3
%tmp4 = add i32 %tmp1, %tmp3
%tmp5 = load i32* %c
%tmp6 = mul i32 %tmp5, 5
%tmp7 = add i32 %tmp4, %tmp6
%tmp8 = load i32* %d
%tmp9 = mul i32 %tmp8, 7
%tmp10 = add i32 %tmp7, %tmp9
%tmp11 = load i32* %e
%tmp12 = mul i32 %tmp11, 11
%tmp13 = add i32 %tmp10, %tmp12
%tmp14 = load i32* %f
%tmp15 = mul i32 %tmp14, 13
%tmp16 = add i32 %tmp13, %tmp15
ret i32 %tmp16
}
; WIN64-LABEL: foo3:
; WIN64: .seh_proc foo3
; WIN64: pushq %rsi
; WIN64: .seh_pushreg 6
; WIN64: subq $24, %rsp
; WIN64: .seh_stackalloc 24
; WIN64: .seh_endprologue
; WIN64: addq $24, %rsp
; WIN64: popq %rsi
; WIN64: ret
; WIN64: .seh_endproc


; Check emission of eh handler and handler data
declare i32 @_d_eh_personality(i32, i32, i64, i8*, i8*)
declare void @_d_eh_resume_unwind(i8*)

declare i32 @bar()

define i32 @foo4() #0 {
entry:
%step = alloca i32, align 4
store i32 0, i32* %step
%tmp = load i32* %step

%tmp1 = invoke i32 @bar()
to label %finally unwind label %landingpad

finally:
store i32 1, i32* %step
br label %endtryfinally

landingpad:
%landing_pad = landingpad { i8*, i32 } personality i32 (i32, i32, i64, i8*, i8*)* @_d_eh_personality
cleanup
%tmp3 = extractvalue { i8*, i32 } %landing_pad, 0
store i32 2, i32* %step
call void @_d_eh_resume_unwind(i8* %tmp3)
unreachable

endtryfinally:
%tmp10 = load i32* %step
ret i32 %tmp10
}
; WIN64-LABEL: foo4:
; WIN64: .seh_proc foo4
; WIN64: .seh_handler _d_eh_personality, @unwind, @except
; WIN64: subq $56, %rsp
; WIN64: .seh_stackalloc 56
; WIN64: .seh_endprologue
; WIN64: addq $56, %rsp
; WIN64: ret
; WIN64: .seh_handlerdata
; WIN64: .seh_endproc


; Check stack re-alignment and xmm spilling
define void @foo5() uwtable {
entry:
%s = alloca i32, align 64
call void asm sideeffect "", "~{rbx},~{rdi},~{xmm6},~{xmm7}"()
ret void
}
; WIN64-LABEL: foo5:
; WIN64: .seh_proc foo5
; WIN64: pushq %rbp
; WIN64: .seh_pushreg 5
; WIN64: movq %rsp, %rbp
; WIN64: pushq %rdi
; WIN64: .seh_pushreg 7
; WIN64: pushq %rbx
; WIN64: .seh_pushreg 3
; WIN64: andq $-64, %rsp
; WIN64: subq $128, %rsp
; WIN64: .seh_stackalloc 48
; WIN64: .seh_setframe 5, 64
; WIN64: movaps %xmm7, -32(%rbp) # 16-byte Spill
; WIN64: movaps %xmm6, -48(%rbp) # 16-byte Spill
; WIN64: .seh_savexmm 6, 16
; WIN64: .seh_savexmm 7, 32
; WIN64: .seh_endprologue
; WIN64: movaps -48(%rbp), %xmm6 # 16-byte Reload
; WIN64: movaps -32(%rbp), %xmm7 # 16-byte Reload
; WIN64: leaq -16(%rbp), %rsp
; WIN64: popq %rbx
; WIN64: popq %rdi
; WIN64: popq %rbp
; WIN64: retq
; WIN64: .seh_endproc