diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll index ff90ba39d9621..c10e35c2cf0d9 100644 --- a/llvm/test/CodeGen/X86/freeze-vector.ll +++ b/llvm/test/CodeGen/X86/freeze-vector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s +; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64 define <4 x i32> @freeze_insert_subvector(<8 x i32> %a0) nounwind { ; CHECK-LABEL: freeze_insert_subvector: @@ -58,3 +58,205 @@ define <4 x float> @freeze_permilps(<4 x float> %a0) nounwind { %z = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> ret <4 x float> %z } + +define void @freeze_bitcast_from_wider_elt(ptr %origin, ptr %dst) nounwind { +; X86-LABEL: freeze_bitcast_from_wider_elt: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vmovlps %xmm0, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: freeze_bitcast_from_wider_elt: +; X64: # %bb.0: +; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: vmovlps %xmm0, (%rsi) +; X64-NEXT: retq + %i0 = load <4 x i16>, ptr %origin + %i1 = bitcast <4 x i16> %i0 to <8 x i8> + %i2 = freeze <8 x i8> %i1 + %i3 = bitcast <8 x i8> %i2 to i64 + store i64 %i3, ptr %dst + ret void +} +define void @freeze_bitcast_from_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind { +; X86-LABEL: freeze_bitcast_from_wider_elt_escape: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vmovsd %xmm0, (%ecx) +; X86-NEXT: vmovlps %xmm0, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: freeze_bitcast_from_wider_elt_escape: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: vmovq %rax, %xmm0 +; X64-NEXT: movq %rax, (%rsi) +; X64-NEXT: vmovq %xmm0, (%rdx) +; X64-NEXT: retq + %i0 = load <4 x i16>, ptr %origin + %i1 = bitcast <4 x i16> %i0 to <8 x i8> + store <8 x i8> %i1, ptr %escape + %i2 = freeze <8 x i8> %i1 + %i3 = bitcast <8 x i8> %i2 to i64 + store i64 %i3, ptr %dst + ret void +} + +define void @freeze_bitcast_to_wider_elt(ptr %origin, ptr %dst) nounwind { +; X86-LABEL: freeze_bitcast_to_wider_elt: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vmovlps %xmm0, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: freeze_bitcast_to_wider_elt: +; X64: # %bb.0: +; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: vmovlps %xmm0, (%rsi) +; X64-NEXT: retq + %i0 = load <8 x i8>, ptr %origin + %i1 = bitcast <8 x i8> %i0 to <4 x i16> + %i2 = freeze <4 x i16> %i1 + %i3 = bitcast <4 x i16> %i2 to i64 + store i64 %i3, ptr %dst + ret void +} +define void @freeze_bitcast_to_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind { +; X86-LABEL: freeze_bitcast_to_wider_elt_escape: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vmovsd %xmm0, (%ecx) +; X86-NEXT: vmovlps %xmm0, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: freeze_bitcast_to_wider_elt_escape: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: vmovq %rax, %xmm0 +; X64-NEXT: movq %rax, (%rsi) +; X64-NEXT: vmovq %xmm0, (%rdx) +; X64-NEXT: retq + %i0 = load <8 x i8>, ptr %origin + %i1 = bitcast <8 x i8> %i0 to <4 x i16> + store <4 x i16> %i1, ptr %escape + %i2 = freeze <4 x i16> %i1 + %i3 = bitcast <4 x i16> %i2 to i64 + store i64 %i3, ptr %dst + ret void +} + +define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwind { +; X86-LABEL: freeze_extractelement: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: vmovdqa (%edx), %xmm0 +; X86-NEXT: vpand (%ecx), %xmm0, %xmm0 +; X86-NEXT: vpextrb $6, %xmm0, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: freeze_extractelement: +; X64: # %bb.0: +; X64-NEXT: vmovdqa (%rdi), %xmm0 +; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 +; X64-NEXT: vpextrb $6, %xmm0, (%rdx) +; X64-NEXT: retq + %i0 = load <16 x i8>, ptr %origin0 + %i1 = load <16 x i8>, ptr %origin1 + %i2 = and <16 x i8> %i0, %i1 + %i3 = freeze <16 x i8> %i2 + %i4 = extractelement <16 x i8> %i3, i64 6 + store i8 %i4, ptr %dst + ret void +} +define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst, ptr %escape) nounwind { +; X86-LABEL: freeze_extractelement_escape: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: vmovdqa (%esi), %xmm0 +; X86-NEXT: vpand (%edx), %xmm0, %xmm0 +; X86-NEXT: vmovdqa %xmm0, (%ecx) +; X86-NEXT: vpextrb $6, %xmm0, (%eax) +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: freeze_extractelement_escape: +; X64: # %bb.0: +; X64-NEXT: vmovdqa (%rdi), %xmm0 +; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 +; X64-NEXT: vmovdqa %xmm0, (%rcx) +; X64-NEXT: vpextrb $6, %xmm0, (%rdx) +; X64-NEXT: retq + %i0 = load <16 x i8>, ptr %origin0 + %i1 = load <16 x i8>, ptr %origin1 + %i2 = and <16 x i8> %i0, %i1 + %i3 = freeze <16 x i8> %i2 + store <16 x i8> %i3, ptr %escape + %i4 = extractelement <16 x i8> %i3, i64 6 + store i8 %i4, ptr %dst + ret void +} +define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %idx0, i64 %idx1, ptr %dst, ptr %escape) nounwind { +; X86-LABEL: freeze_extractelement_extra_use: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: andl $15, %eax +; X86-NEXT: movl 16(%ebp), %ecx +; X86-NEXT: andl $15, %ecx +; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 12(%ebp), %esi +; X86-NEXT: movl 8(%ebp), %edi +; X86-NEXT: vmovaps (%edi), %xmm0 +; X86-NEXT: vandps (%esi), %xmm0, %xmm0 +; X86-NEXT: vmovaps %xmm0, (%esp) +; X86-NEXT: movzbl (%esp,%ecx), %ecx +; X86-NEXT: cmpb (%esp,%eax), %cl +; X86-NEXT: sete (%edx) +; X86-NEXT: leal -8(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: freeze_extractelement_extra_use: +; X64: # %bb.0: +; X64-NEXT: andl $15, %ecx +; X64-NEXT: andl $15, %edx +; X64-NEXT: vmovaps (%rdi), %xmm0 +; X64-NEXT: vandps (%rsi), %xmm0, %xmm0 +; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movzbl -24(%rsp,%rdx), %eax +; X64-NEXT: cmpb -24(%rsp,%rcx), %al +; X64-NEXT: sete (%r8) +; X64-NEXT: retq + %i0 = load <16 x i8>, ptr %origin0 + %i1 = load <16 x i8>, ptr %origin1 + %i2 = and <16 x i8> %i0, %i1 + %i3 = freeze <16 x i8> %i2 + %i4 = extractelement <16 x i8> %i3, i64 %idx0 + %i5 = extractelement <16 x i8> %i3, i64 %idx1 + %i6 = icmp eq i8 %i4, %i5 + store i1 %i6, ptr %dst + ret void +}