diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 43b5a20006ba7..92d1bd1bb0252 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3851,13 +3851,10 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, EVT ArgVT = Ins[i].ArgVT; - // If this is a vector that has been split into multiple parts, and the - // scalar size of the parts don't match the vector element size, then we can't - // elide the copy. The parts will have padding between them instead of being - // packed like a vector. - bool ScalarizedAndExtendedVector = - ArgVT.isVector() && !VA.getLocVT().isVector() && - VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits(); + // If this is a vector that has been split into multiple parts, don't elide + // the copy. The layout on the stack may not match the packed in-memory + // layout. + bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector(); // This is an argument in memory. We might be able to perform copy elision. // If the argument is passed directly in memory without any extension, then we @@ -3865,7 +3862,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, // indirectly by pointer. if (Flags.isCopyElisionCandidate() && VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem && - !ScalarizedAndExtendedVector) { + !ScalarizedVector) { SDValue PartAddr; if (Ins[i].PartOffset == 0) { // If this is a one-part value or the first part of a multi-part value, diff --git a/llvm/test/CodeGen/X86/pr63475.ll b/llvm/test/CodeGen/X86/pr63475.ll index d4b7a7cacefda..0052688b5aa13 100644 --- a/llvm/test/CodeGen/X86/pr63475.ll +++ b/llvm/test/CodeGen/X86/pr63475.ll @@ -27,7 +27,8 @@ define void @caller() nounwind { ret void } -; FIXME: This is a miscompile. +; Make sure the stack offsets are correct. The distance between them should +; be 8, not 4. define void @callee(ptr %p0, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, <7 x i32> %arg) nounwind { ; CHECK-LABEL: callee: ; CHECK: # %bb.0: # %start @@ -37,28 +38,41 @@ define void @callee(ptr %p0, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, <7 x i ; CHECK-NEXT: pushq %r13 ; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movl 112(%rsp), %ebx -; CHECK-NEXT: movl 104(%rsp), %ebp -; CHECK-NEXT: movl 96(%rsp), %r14d -; CHECK-NEXT: movl 76(%rsp), %r15d -; CHECK-NEXT: movl 72(%rsp), %r12d -; CHECK-NEXT: movl 64(%rsp), %edi -; CHECK-NEXT: movl 68(%rsp), %r13d -; CHECK-NEXT: callq use@PLT -; CHECK-NEXT: movl %r13d, %edi -; CHECK-NEXT: callq use@PLT -; CHECK-NEXT: movl %r12d, %edi +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: movl 120(%rsp), %ebx +; CHECK-NEXT: movd %ebx, %xmm0 +; CHECK-NEXT: movl 112(%rsp), %ebp +; CHECK-NEXT: movd %ebp, %xmm1 +; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: movl 104(%rsp), %r15d +; CHECK-NEXT: movd %r15d, %xmm0 +; CHECK-NEXT: movl 96(%rsp), %edi +; CHECK-NEXT: movd %edi, %xmm2 +; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; CHECK-NEXT: movl 136(%rsp), %r14d +; CHECK-NEXT: movd %r14d, %xmm0 +; CHECK-NEXT: movl 128(%rsp), %r12d +; CHECK-NEXT: movd %r12d, %xmm1 +; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: movl 144(%rsp), %r13d +; CHECK-NEXT: movl %r13d, 36(%rsp) +; CHECK-NEXT: movq %xmm1, 28(%rsp) +; CHECK-NEXT: movdqu %xmm2, 12(%rsp) ; CHECK-NEXT: callq use@PLT ; CHECK-NEXT: movl %r15d, %edi ; CHECK-NEXT: callq use@PLT -; CHECK-NEXT: movl %r14d, %edi -; CHECK-NEXT: callq use@PLT ; CHECK-NEXT: movl %ebp, %edi ; CHECK-NEXT: callq use@PLT ; CHECK-NEXT: movl %ebx, %edi ; CHECK-NEXT: callq use@PLT -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: movl %r12d, %edi +; CHECK-NEXT: callq use@PLT +; CHECK-NEXT: movl %r14d, %edi +; CHECK-NEXT: callq use@PLT +; CHECK-NEXT: movl %r13d, %edi +; CHECK-NEXT: callq use@PLT +; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 ; CHECK-NEXT: popq %r13