82 changes: 62 additions & 20 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2691,6 +2691,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
MVT PtrVT = getPointerTy(DAG.getDataLayout());

// If value is passed by pointer we have address passed instead of the value
// itself. No need to extend if the mask value and location share the same
Expand Down Expand Up @@ -2729,30 +2730,71 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
if (CallConv == CallingConv::X86_INTR) {
MFI.setObjectOffset(FI, Offset);
}
return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
} else {
int FI = MFI.CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);

// Set SExt or ZExt flag.
if (VA.getLocInfo() == CCValAssign::ZExt) {
MFI.setObjectZExt(FI, true);
} else if (VA.getLocInfo() == CCValAssign::SExt) {
MFI.setObjectSExt(FI, true);
return DAG.getFrameIndex(FI, PtrVT);
}

// This is an argument in memory. We might be able to perform copy elision.
if (Flags.isCopyElisionCandidate()) {
EVT ArgVT = Ins[i].ArgVT;
SDValue PartAddr;
if (Ins[i].PartOffset == 0) {
// If this is a one-part value or the first part of a multi-part value,
// create a stack object for the entire argument value type and return a
// load from our portion of it. This assumes that if the first part of an
// argument is in memory, the rest will also be in memory.
int FI = MFI.CreateFixedObject(ArgVT.getSizeInBits() / 8,
VA.getLocMemOffset(), /*Immutable=*/false);
PartAddr = DAG.getFrameIndex(FI, PtrVT);
return DAG.getLoad(
ValVT, dl, Chain, PartAddr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
} else {
// This is not the first piece of an argument in memory. See if there is
// already a fixed stack object including this offset. If so, assume it
// was created by the PartOffset == 0 branch above and create a load from
// the appropriate offset into it.
int64_t PartBegin = VA.getLocMemOffset();
int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
int FI = MFI.getObjectIndexBegin();
for (; MFI.isFixedObjectIndex(FI); ++FI) {
int64_t ObjBegin = MFI.getObjectOffset(FI);
int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
break;
}
if (MFI.isFixedObjectIndex(FI)) {
SDValue Addr =
DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
return DAG.getLoad(
ValVT, dl, Chain, Addr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
Ins[i].PartOffset));
}
}
}

// Adjust SP offset of interrupt parameter.
if (CallConv == CallingConv::X86_INTR) {
MFI.setObjectOffset(FI, Offset);
}
int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
VA.getLocMemOffset(), isImmutable);

SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue Val = DAG.getLoad(
ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
return ExtendedInMem ?
DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
// Set SExt or ZExt flag.
if (VA.getLocInfo() == CCValAssign::ZExt) {
MFI.setObjectZExt(FI, true);
} else if (VA.getLocInfo() == CCValAssign::SExt) {
MFI.setObjectSExt(FI, true);
}

// Adjust SP offset of interrupt parameter.
if (CallConv == CallingConv::X86_INTR) {
MFI.setObjectOffset(FI, Offset);
}

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getLoad(
ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)
: Val;
}

// FIXME: Get this from tablegen.
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; rdar://13625505
; Here we have 9 fixed integer arguments the 9th argument in on stack, the
; varargs start right after at 8-byte alignment.
define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
; CHECK-LABEL: fn9:
; 9th fixed argument
; CHECK: ldr {{w[0-9]+}}, [sp, #64]
Expand All @@ -30,7 +30,6 @@ define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
%a10 = alloca i32, align 4
%a11 = alloca i32, align 4
%a12 = alloca i32, align 4
store i32 %a1, i32* %1, align 4
store i32 %a2, i32* %2, align 4
store i32 %a3, i32* %3, align 4
store i32 %a4, i32* %4, align 4
Expand All @@ -39,6 +38,7 @@ define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
store i32 %a7, i32* %7, align 4
store i32 %a8, i32* %8, align 4
store i32 %a9, i32* %9, align 4
store i32 %a9, i32* %a1
%10 = bitcast i8** %args to i8*
call void @llvm.va_start(i8* %10)
%11 = va_arg i8** %args, i32
Expand Down Expand Up @@ -93,7 +93,7 @@ define i32 @main() nounwind ssp {
%10 = load i32, i32* %a10, align 4
%11 = load i32, i32* %a11, align 4
%12 = load i32, i32* %a12, align 4
call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
call void (i32*, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32* %a1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
ret i32 0
}

Expand Down
61 changes: 61 additions & 0 deletions llvm/test/CodeGen/ARM/arg-copy-elide.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
; RUN: llc -mtriple=armv7-linux < %s | FileCheck %s

declare arm_aapcscc void @addrof_i32(i32*)
declare arm_aapcscc void @addrof_i64(i64*)

define arm_aapcscc void @simple(i32, i32, i32, i32, i32 %x) {
entry:
%x.addr = alloca i32
store i32 %x, i32* %x.addr
call void @addrof_i32(i32* %x.addr)
ret void
}

; CHECK-LABEL: simple:
; CHECK: push {r11, lr}
; CHECK: add r0, sp, #8
; CHECK: bl addrof_i32
; CHECK: pop {r11, pc}


; We need to load %x before calling addrof_i32 now because it could mutate %x in
; place.

define arm_aapcscc i32 @use_arg(i32, i32, i32, i32, i32 %x) {
entry:
%x.addr = alloca i32
store i32 %x, i32* %x.addr
call void @addrof_i32(i32* %x.addr)
ret i32 %x
}

; CHECK-LABEL: use_arg:
; CHECK: push {[[csr:[^ ]*]], lr}
; CHECK: ldr [[csr]], [sp, #8]
; CHECK: add r0, sp, #8
; CHECK: bl addrof_i32
; CHECK: mov r0, [[csr]]
; CHECK: pop {[[csr]], pc}


define arm_aapcscc i64 @split_i64(i32, i32, i32, i32, i64 %x) {
entry:
%x.addr = alloca i64, align 4
store i64 %x, i64* %x.addr, align 4
call void @addrof_i64(i64* %x.addr)
ret i64 %x
}

; CHECK-LABEL: split_i64:
; CHECK: push {r4, r5, r11, lr}
; CHECK: sub sp, sp, #8
; CHECK: ldr r4, [sp, #28]
; CHECK: ldr r5, [sp, #24]
; CHECK: mov r0, sp
; CHECK: str r4, [sp, #4]
; CHECK: str r5, [sp]
; CHECK: bl addrof_i64
; CHECK: mov r0, r5
; CHECK: mov r1, r4
; CHECK: add sp, sp, #8
; CHECK: pop {r4, r5, r11, pc}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Mips/o32_cc_vararg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,8 @@ entry:
ret i32 %tmp

; CHECK-LABEL: va9:
; CHECK: addiu $sp, $sp, -32
; CHECK: lw $2, 52($sp)
; CHECK: addiu $sp, $sp, -24
; CHECK: lw $2, 44($sp)
}

; double
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ target triple = "i386-apple-darwin10.0.0"
@.str = internal constant [4 x i8] c"%p\0A\00" ; <[4 x i8]*> [#uses=1]
@llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.S*, i32, %struct.S*)* @_Z4test1SiS_ to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]

; Verify that %esi gets spilled before the call.
; Verify that %s1 gets spilled before the call.
; CHECK: Z4test1SiS
; CHECK: movl %esi,{{.*}}(%ebp)
; CHECK: leal 8(%ebp), %[[reg:[^ ]*]]
; CHECK: movl %[[reg]],{{.*}}(%ebp) ## 4-byte Spill
; CHECK: calll __Z6throwsv

define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
Expand Down
280 changes: 280 additions & 0 deletions llvm/test/CodeGen/X86/arg-copy-elide.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
; RUN: llc -mtriple=i686-windows < %s | FileCheck %s

declare void @addrof_i32(i32*)
declare void @addrof_i64(i64*)
declare void @addrof_i128(i128*)
declare void @addrof_i32_x3(i32*, i32*, i32*)

define void @simple(i32 %x) {
entry:
%x.addr = alloca i32
store i32 %x, i32* %x.addr
call void @addrof_i32(i32* %x.addr)
ret void
}

; CHECK-LABEL: _simple:
; CHECK: leal 4(%esp), %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i32
; CHECK: retl


; We need to load %x before calling addrof_i32 now because it could mutate %x in
; place.

define i32 @use_arg(i32 %x) {
entry:
%x.addr = alloca i32
store i32 %x, i32* %x.addr
call void @addrof_i32(i32* %x.addr)
ret i32 %x
}

; CHECK-LABEL: _use_arg:
; CHECK: pushl %[[csr:[^ ]*]]
; CHECK-DAG: movl 8(%esp), %[[csr]]
; CHECK-DAG: leal 8(%esp), %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i32
; CHECK: movl %[[csr]], %eax
; CHECK: popl %[[csr]]
; CHECK: retl


define i64 @split_i64(i64 %x) {
entry:
%x.addr = alloca i64, align 4
store i64 %x, i64* %x.addr, align 4
call void @addrof_i64(i64* %x.addr)
ret i64 %x
}

; CHECK-LABEL: _split_i64:
; CHECK: pushl %ebp
; CHECK: movl %esp, %ebp
; CHECK: pushl %[[csr2:[^ ]*]]
; CHECK: pushl %[[csr1:[^ ]*]]
; CHECK: andl $-8, %esp
; CHECK-DAG: movl 8(%ebp), %[[csr1]]
; CHECK-DAG: movl 12(%ebp), %[[csr2]]
; CHECK-DAG: leal 8(%ebp), %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i64
; CHECK-DAG: movl %[[csr1]], %eax
; CHECK-DAG: movl %[[csr2]], %edx
; CHECK: leal -8(%ebp), %esp
; CHECK: popl %[[csr1]]
; CHECK: popl %[[csr2]]
; CHECK: popl %ebp
; CHECK: retl


; We can't copy elide when an i64 is split between registers and memory in a
; fastcc function.

define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) {
entry:
%x.addr = alloca i64, align 4
store i64 %x, i64* %x.addr, align 4
call void @addrof_i64(i64* %x.addr)
ret i64 %x
}

; CHECK-LABEL: _fastcc_split_i64:
; CHECK: pushl %ebp
; CHECK: movl %esp, %ebp
; CHECK-DAG: movl %edx, %[[r1:[^ ]*]]
; CHECK-DAG: movl 8(%ebp), %[[r2:[^ ]*]]
; CHECK-DAG: movl %[[r2]], 4(%esp)
; CHECK-DAG: movl %[[r1]], (%esp)
; CHECK: movl %esp, %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i64
; CHECK: popl %ebp
; CHECK: retl


; We can't copy elide when it would reduce the user requested alignment.

define void @high_alignment(i32 %x) {
entry:
%x.p = alloca i32, align 128
store i32 %x, i32* %x.p
call void @addrof_i32(i32* %x.p)
ret void
}

; CHECK-LABEL: _high_alignment:
; CHECK: andl $-128, %esp
; CHECK: movl 8(%ebp), %[[reg:[^ ]*]]
; CHECK: movl %[[reg]], (%esp)
; CHECK: movl %esp, %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i32
; CHECK: retl


; We can't copy elide when it would reduce the ABI required alignment.
; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC
; doesn't guarantee it.

define void @abi_alignment(i64 %x) {
entry:
%x.p = alloca i64
store i64 %x, i64* %x.p
call void @addrof_i64(i64* %x.p)
ret void
}

; CHECK-LABEL: _abi_alignment:
; CHECK: andl $-8, %esp
; CHECK: movl 8(%ebp), %[[reg:[^ ]*]]
; CHECK: movl %[[reg]], (%esp)
; CHECK: movl %esp, %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i64
; CHECK: retl


; The code we generate for this is unimportant. This is mostly a crash test.

define void @split_i128(i128* %sret, i128 %x) {
entry:
%x.addr = alloca i128
store i128 %x, i128* %x.addr
call void @addrof_i128(i128* %x.addr)
store i128 %x, i128* %sret
ret void
}

; CHECK-LABEL: _split_i128:
; CHECK: pushl %ebp
; CHECK: calll _addrof_i128
; CHECK: retl


; Check that we load all of x, y, and z before the call.

define i32 @three_args(i32 %x, i32 %y, i32 %z) {
entry:
%z.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
%x.addr = alloca i32, align 4
store i32 %z, i32* %z.addr, align 4
store i32 %y, i32* %y.addr, align 4
store i32 %x, i32* %x.addr, align 4
call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr)
%s1 = add i32 %x, %y
%sum = add i32 %s1, %z
ret i32 %sum
}

; CHECK-LABEL: _three_args:
; CHECK: pushl %[[csr:[^ ]*]]
; CHECK-DAG: movl {{[0-9]+}}(%esp), %[[csr]]
; CHECK-DAG: addl {{[0-9]+}}(%esp), %[[csr]]
; CHECK-DAG: addl {{[0-9]+}}(%esp), %[[csr]]
; CHECK-DAG: leal 8(%esp), %[[x:[^ ]*]]
; CHECK-DAG: leal 12(%esp), %[[y:[^ ]*]]
; CHECK-DAG: leal 16(%esp), %[[z:[^ ]*]]
; CHECK: pushl %[[z]]
; CHECK: pushl %[[y]]
; CHECK: pushl %[[x]]
; CHECK: calll _addrof_i32_x3
; CHECK: movl %[[csr]], %eax
; CHECK: popl %[[csr]]
; CHECK: retl


define void @two_args_same_alloca(i32 %x, i32 %y) {
entry:
%x.addr = alloca i32
store i32 %x, i32* %x.addr
store i32 %y, i32* %x.addr
call void @addrof_i32(i32* %x.addr)
ret void
}

; CHECK-LABEL: _two_args_same_alloca:
; CHECK: movl 8(%esp), {{.*}}
; CHECK: movl {{.*}}, 4(%esp)
; CHECK: leal 4(%esp), %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i32
; CHECK: retl


define void @avoid_byval(i32* byval %x) {
entry:
%x.p.p = alloca i32*
store i32* %x, i32** %x.p.p
call void @addrof_i32(i32* %x)
ret void
}

; CHECK-LABEL: _avoid_byval:
; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i32
; CHECK: retl


define void @avoid_inalloca(i32* inalloca %x) {
entry:
%x.p.p = alloca i32*
store i32* %x, i32** %x.p.p
call void @addrof_i32(i32* %x)
ret void
}

; CHECK-LABEL: _avoid_inalloca:
; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i32
; CHECK: retl


; Don't elide the copy when the alloca is escaped with a store.

define void @escape_with_store(i32 %x) {
%x1 = alloca i32
%x2 = alloca i32*
store i32* %x1, i32** %x2
%x3 = load i32*, i32** %x2
store i32 0, i32* %x3
store i32 %x, i32* %x1
call void @addrof_i32(i32* %x1)
ret void
}

; CHECK-LABEL: _escape_with_store:
; CHECK-DAG: movl {{.*}}(%esp), %[[reg:[^ ]*]]
; CHECK-DAG: movl $0, [[offs:[0-9]*]](%esp)
; CHECK: movl %[[reg]], [[offs]](%esp)
; CHECK: calll _addrof_i32


; This test case exposed issues with the use of TokenFactor.

define void @sret_and_elide(i32* sret %sret, i32 %v) {
%v.p = alloca i32
store i32 %v, i32* %v.p
call void @addrof_i32(i32* %v.p)
store i32 %v, i32* %sret
ret void
}

; CHECK-LABEL: _sret_and_elide:
; CHECK: pushl
; CHECK: pushl
; CHECK: movl 12(%esp), %[[sret:[^ ]*]]
; CHECK: movl 16(%esp), %[[v:[^ ]*]]
; CHECK: leal 16(%esp), %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i32
; CHECK: movl %[[v]], (%[[sret]])
; CHECK: movl %[[sret]], %eax
; CHECK: popl
; CHECK: popl
; CHECK: retl
30 changes: 13 additions & 17 deletions llvm/test/CodeGen/X86/inline-asm-tied.ll
Original file line number Diff line number Diff line change
@@ -1,31 +1,27 @@
; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -optimize-regalloc -regalloc=basic -no-integrated-as | FileCheck %s
; rdar://6992609

; CHECK: movl %ecx, 4([[ESP:%e..]])
; CHECK: movl 4([[ESP]]), [[EDX:%e..]]
; CHECK: movl [[EDX]], 4([[ESP]])
target triple = "i386-apple-darwin9.0"
@llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]

define i64 @_OSSwapInt64(i64 %_data) nounwind {
entry:
%retval = alloca i64 ; <i64*> [#uses=2]
%_data.addr = alloca i64 ; <i64*> [#uses=4]
store i64 %_data, i64* %_data.addr
%tmp = load i64, i64* %_data.addr ; <i64> [#uses=1]
%0 = call i64 asm "bswap %eax\0A\09bswap %edx\0A\09xchgl %eax, %edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %tmp) nounwind ; <i64> [#uses=1]
store i64 %0, i64* %_data.addr
%tmp1 = load i64, i64* %_data.addr ; <i64> [#uses=1]
store i64 %tmp1, i64* %retval
%1 = load i64, i64* %retval ; <i64> [#uses=1]
ret i64 %1
%0 = call i64 asm "bswap %eax\0A\09bswap %edx\0A\09xchgl %eax, %%edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %_data) nounwind
ret i64 %0
}

; CHECK-LABEL: __OSSwapInt64:
; CHECK-DAG: movl 8(%esp), %edx
; CHECK-DAG: movl 4(%esp), %eax
; CHECK: ## InlineAsm Start
; CHECK: ## InlineAsm End
; Everything is set up in EAX:EDX, return immediately.
; CHECK-NEXT: retl

; The tied operands are not necessarily in the same order as the defs.
; PR13742
define i64 @swapped(i64 %x, i64 %y) nounwind {
entry:
%x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) nounwind
%x1 = extractvalue { i64, i64 } %x0, 0
ret i64 %x1
%x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) nounwind
%x1 = extractvalue { i64, i64 } %x0, 0
ret i64 %x1
}
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/X86/pr30430.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,6 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm8, (%rsp)
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
Expand All @@ -46,14 +38,14 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp)
Expand All @@ -62,14 +54,14 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm16, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm17, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm18, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm19, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm20, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm21, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm22, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
Expand Down Expand Up @@ -104,11 +96,19 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
; CHECK-NEXT: # implicit-def: %YMM3
; CHECK-NEXT: vmovaps %xmm1, %xmm3
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3
; CHECK-NEXT: # implicit-def: %ZMM16
; CHECK-NEXT: vmovaps %zmm3, %zmm16
; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm16, %zmm16
; CHECK-NEXT: vmovaps %zmm16, {{[0-9]+}}(%rsp)
; CHECK-NEXT: # implicit-def: %ZMM24
; CHECK-NEXT: vmovaps %zmm3, %zmm24
; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24
; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0
; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) # 4-byte Spill
; CHECK-NEXT: vmovss %xmm14, (%rsp) # 4-byte Spill
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
Expand Down
8 changes: 2 additions & 6 deletions llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1653,12 +1653,8 @@ define <4 x float> @test_mm_set1_ps(float %a0) nounwind {
define void @test_mm_setcsr(i32 %a0) nounwind {
; X32-LABEL: test_mm_setcsr:
; X32: # BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl %esp, %ecx
; X32-NEXT: movl %eax, (%esp)
; X32-NEXT: ldmxcsr (%ecx)
; X32-NEXT: popl %eax
; X32-NEXT: leal 4(%esp), %eax
; X32-NEXT: ldmxcsr (%eax)
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setcsr:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/DebugInfo/X86/discriminator.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe

; CHECK: Address Line Column File ISA Discriminator Flags
; CHECK: ------------------ ------ ------ ------ --- ------------- -------------
; CHECK: 0x0000000000000011 2 0 1 0 42 {{$}}
; CHECK: 0x000000000000000a 2 0 1 0 42 {{$}}