diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll index 9d573ef2a8fad..9a5f296f4516c 100644 --- a/llvm/test/CodeGen/X86/extractelement-load.ll +++ b/llvm/test/CodeGen/X86/extractelement-load.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2 +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=X64,X64-SSSE3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 @@ -7,12 +7,12 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define i32 @t(ptr %val) nounwind { -; X32-SSE2-LABEL: t: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3] -; X32-SSE2-NEXT: movd %xmm0, %eax -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: t: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3] +; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: t: ; X64-SSSE3: # %bb.0: @@ -33,9 +33,9 @@ define i32 @t(ptr %val) nounwind { ; Case where extractelement of load ends up as undef. ; (Making sure this doesn't crash.) define i32 @t2(ptr %xp) { -; X32-SSE2-LABEL: t2: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: t2: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: retl ; ; X64-LABEL: t2: ; X64: # %bb.0: @@ -51,12 +51,12 @@ define i32 @t2(ptr %xp) { ; narrow load. define void @t3(ptr %a0) { -; X32-SSE2-LABEL: t3: -; X32-SSE2: # %bb.0: # %bb -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movups (%eax), %xmm0 -; X32-SSE2-NEXT: movhps %xmm0, (%eax) -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: t3: +; X86-SSE2: # %bb.0: # %bb +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movups (%eax), %xmm0 +; X86-SSE2-NEXT: movhps %xmm0, (%eax) +; X86-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: t3: ; X64-SSSE3: # %bb.0: # %bb @@ -81,14 +81,14 @@ bb: ; This is testing for an assertion - the extraction was assuming that the undef ; second shuffle operand was a post-bitcast type instead of a pre-bitcast type. define i64 @t4(ptr %a) { -; X32-SSE2-LABEL: t4: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movdqa (%eax), %xmm0 -; X32-SSE2-NEXT: movd %xmm0, %eax -; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X32-SSE2-NEXT: movd %xmm0, %edx -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: t4: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movdqa (%eax), %xmm0 +; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE2-NEXT: movd %xmm0, %edx +; X86-SSE2-NEXT: retl ; ; X64-LABEL: t4: ; X64: # %bb.0: @@ -103,13 +103,13 @@ define i64 @t4(ptr %a) { ; Don't extract from a volatile. define void @t5(ptr%a0, ptr%a1) { -; X32-SSE2-LABEL: t5: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: movaps (%ecx), %xmm0 -; X32-SSE2-NEXT: movhps %xmm0, (%eax) -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: t5: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movaps (%ecx), %xmm0 +; X86-SSE2-NEXT: movhps %xmm0, (%eax) +; X86-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: t5: ; X64-SSSE3: # %bb.0: @@ -130,24 +130,24 @@ define void @t5(ptr%a0, ptr%a1) { ; Check for multiuse. define float @t6(ptr%a0) { -; X32-SSE2-LABEL: t6: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %eax -; X32-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movaps (%eax), %xmm0 -; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X32-SSE2-NEXT: xorps %xmm1, %xmm1 -; X32-SSE2-NEXT: cmpeqss %xmm0, %xmm1 -; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; X32-SSE2-NEXT: andps %xmm1, %xmm2 -; X32-SSE2-NEXT: andnps %xmm0, %xmm1 -; X32-SSE2-NEXT: orps %xmm2, %xmm1 -; X32-SSE2-NEXT: movss %xmm1, (%esp) -; X32-SSE2-NEXT: flds (%esp) -; X32-SSE2-NEXT: popl %eax -; X32-SSE2-NEXT: .cfi_def_cfa_offset 4 -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: t6: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movaps (%eax), %xmm0 +; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE2-NEXT: xorps %xmm1, %xmm1 +; X86-SSE2-NEXT: cmpeqss %xmm0, %xmm1 +; X86-SSE2-NEXT: movss {{.*#+}} xmm2 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE2-NEXT: andps %xmm1, %xmm2 +; X86-SSE2-NEXT: andnps %xmm0, %xmm1 +; X86-SSE2-NEXT: orps %xmm2, %xmm1 +; X86-SSE2-NEXT: movss %xmm1, (%esp) +; X86-SSE2-NEXT: flds (%esp) +; X86-SSE2-NEXT: popl %eax +; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: t6: ; X64-SSSE3: # %bb.0: @@ -184,20 +184,20 @@ define float @t6(ptr%a0) { } define void @PR43971(ptr%a0, ptr%a1) { -; X32-SSE2-LABEL: PR43971: -; X32-SSE2: # %bb.0: # %entry -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: movaps 16(%ecx), %xmm0 -; X32-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; X32-SSE2-NEXT: xorps %xmm1, %xmm1 -; X32-SSE2-NEXT: cmpltss %xmm0, %xmm1 -; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-SSE2-NEXT: andps %xmm1, %xmm2 -; X32-SSE2-NEXT: andnps %xmm0, %xmm1 -; X32-SSE2-NEXT: orps %xmm2, %xmm1 -; X32-SSE2-NEXT: movss %xmm1, (%eax) -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: PR43971: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movaps 16(%ecx), %xmm0 +; X86-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X86-SSE2-NEXT: xorps %xmm1, %xmm1 +; X86-SSE2-NEXT: cmpltss %xmm0, %xmm1 +; X86-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: andps %xmm1, %xmm2 +; X86-SSE2-NEXT: andnps %xmm0, %xmm1 +; X86-SSE2-NEXT: orps %xmm2, %xmm1 +; X86-SSE2-NEXT: movss %xmm1, (%eax) +; X86-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: PR43971: ; X64-SSSE3: # %bb.0: # %entry @@ -231,22 +231,22 @@ entry: } define float @PR43971_1(ptr%a0) nounwind { -; X32-SSE2-LABEL: PR43971_1: -; X32-SSE2: # %bb.0: # %entry -; X32-SSE2-NEXT: pushl %eax -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movaps (%eax), %xmm0 -; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X32-SSE2-NEXT: xorps %xmm1, %xmm1 -; X32-SSE2-NEXT: cmpeqss %xmm0, %xmm1 -; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; X32-SSE2-NEXT: andps %xmm1, %xmm2 -; X32-SSE2-NEXT: andnps %xmm0, %xmm1 -; X32-SSE2-NEXT: orps %xmm2, %xmm1 -; X32-SSE2-NEXT: movss %xmm1, (%esp) -; X32-SSE2-NEXT: flds (%esp) -; X32-SSE2-NEXT: popl %eax -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: PR43971_1: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movaps (%eax), %xmm0 +; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE2-NEXT: xorps %xmm1, %xmm1 +; X86-SSE2-NEXT: cmpeqss %xmm0, %xmm1 +; X86-SSE2-NEXT: movss {{.*#+}} xmm2 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X86-SSE2-NEXT: andps %xmm1, %xmm2 +; X86-SSE2-NEXT: andnps %xmm0, %xmm1 +; X86-SSE2-NEXT: orps %xmm2, %xmm1 +; X86-SSE2-NEXT: movss %xmm1, (%esp) +; X86-SSE2-NEXT: flds (%esp) +; X86-SSE2-NEXT: popl %eax +; X86-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: PR43971_1: ; X64-SSSE3: # %bb.0: # %entry @@ -285,15 +285,15 @@ entry: ; Test for bad extractions from a VBROADCAST_LOAD of the <2 x i16> non-uniform constant bitcast as <4 x i32>. define void @subextract_broadcast_load_constant(ptr nocapture %0, ptr nocapture %1, ptr nocapture %2) nounwind { -; X32-SSE2-LABEL: subextract_broadcast_load_constant: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-SSE2-NEXT: movl $-1583308898, (%edx) # imm = 0xA1A09F9E -; X32-SSE2-NEXT: movw $-24674, (%ecx) # imm = 0x9F9E -; X32-SSE2-NEXT: movw $-24160, (%eax) # imm = 0xA1A0 -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: subextract_broadcast_load_constant: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE2-NEXT: movl $-1583308898, (%edx) # imm = 0xA1A09F9E +; X86-SSE2-NEXT: movw $-24674, (%ecx) # imm = 0x9F9E +; X86-SSE2-NEXT: movw $-24160, (%eax) # imm = 0xA1A0 +; X86-SSE2-NEXT: retl ; ; X64-LABEL: subextract_broadcast_load_constant: ; X64: # %bb.0: @@ -319,15 +319,15 @@ define void @subextract_broadcast_load_constant(ptr nocapture %0, ptr nocapture ; A scalar load is favored over a XMM->GPR register transfer in this example. define i32 @multi_use_load_scalarization(ptr %p) nounwind { -; X32-SSE2-LABEL: multi_use_load_scalarization: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: movl (%ecx), %eax -; X32-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; X32-SSE2-NEXT: psubd %xmm1, %xmm0 -; X32-SSE2-NEXT: movdqa %xmm0, (%ecx) -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: multi_use_load_scalarization: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movl (%ecx), %eax +; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-SSE2-NEXT: psubd %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, (%ecx) +; X86-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: multi_use_load_scalarization: ; X64-SSSE3: # %bb.0: @@ -354,15 +354,15 @@ define i32 @multi_use_load_scalarization(ptr %p) nounwind { } define i32 @multi_use_volatile_load_scalarization(ptr %p) nounwind { -; X32-SSE2-LABEL: multi_use_volatile_load_scalarization: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: movdqu (%ecx), %xmm0 -; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; X32-SSE2-NEXT: movd %xmm0, %eax -; X32-SSE2-NEXT: psubd %xmm1, %xmm0 -; X32-SSE2-NEXT: movdqa %xmm0, (%ecx) -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: multi_use_volatile_load_scalarization: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movdqu (%ecx), %xmm0 +; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: psubd %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, (%ecx) +; X86-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: multi_use_volatile_load_scalarization: ; X64-SSSE3: # %bb.0: @@ -398,41 +398,41 @@ define i32 @multi_use_volatile_load_scalarization(ptr %p) nounwind { @zero = internal unnamed_addr global <8 x i32> zeroinitializer, align 32 define i32 @main() nounwind { -; X32-SSE2-LABEL: main: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %ebp -; X32-SSE2-NEXT: movl %esp, %ebp -; X32-SSE2-NEXT: pushl %esi -; X32-SSE2-NEXT: andl $-32, %esp -; X32-SSE2-NEXT: subl $64, %esp -; X32-SSE2-NEXT: movdqa zero, %xmm0 -; X32-SSE2-NEXT: movaps n1+16, %xmm1 -; X32-SSE2-NEXT: movaps n1, %xmm2 -; X32-SSE2-NEXT: movaps %xmm2, zero -; X32-SSE2-NEXT: movaps %xmm1, zero+16 -; X32-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2] -; X32-SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) -; X32-SSE2-NEXT: movaps %xmm1, (%esp) -; X32-SSE2-NEXT: movdqa (%esp), %xmm1 -; X32-SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm2 -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-SSE2-NEXT: movd %xmm2, %eax -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-SSE2-NEXT: movd %xmm2, %ecx -; X32-SSE2-NEXT: xorl %edx, %edx -; X32-SSE2-NEXT: divl %ecx -; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X32-SSE2-NEXT: movd %xmm0, %eax -; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] -; X32-SSE2-NEXT: movd %xmm0, %esi -; X32-SSE2-NEXT: xorl %edx, %edx -; X32-SSE2-NEXT: divl %esi -; X32-SSE2-NEXT: addl %ecx, %eax -; X32-SSE2-NEXT: leal -4(%ebp), %esp -; X32-SSE2-NEXT: popl %esi -; X32-SSE2-NEXT: popl %ebp -; X32-SSE2-NEXT: retl +; X86-SSE2-LABEL: main: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: andl $-32, %esp +; X86-SSE2-NEXT: subl $64, %esp +; X86-SSE2-NEXT: movdqa zero, %xmm0 +; X86-SSE2-NEXT: movaps n1+16, %xmm1 +; X86-SSE2-NEXT: movaps n1, %xmm2 +; X86-SSE2-NEXT: movaps %xmm2, zero +; X86-SSE2-NEXT: movaps %xmm1, zero+16 +; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2] +; X86-SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: movaps %xmm1, (%esp) +; X86-SSE2-NEXT: movdqa (%esp), %xmm1 +; X86-SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm2 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X86-SSE2-NEXT: movd %xmm2, %eax +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] +; X86-SSE2-NEXT: movd %xmm2, %ecx +; X86-SSE2-NEXT: xorl %edx, %edx +; X86-SSE2-NEXT: divl %ecx +; X86-SSE2-NEXT: movl %eax, %ecx +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; X86-SSE2-NEXT: movd %xmm0, %esi +; X86-SSE2-NEXT: xorl %edx, %edx +; X86-SSE2-NEXT: divl %esi +; X86-SSE2-NEXT: addl %ecx, %eax +; X86-SSE2-NEXT: leal -4(%ebp), %esp +; X86-SSE2-NEXT: popl %esi +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: retl ; ; X64-SSSE3-LABEL: main: ; X64-SSSE3: # %bb.0: