From 57923617181b0181e8d5c2f2e940a94a82737c7c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 22 Feb 2020 22:40:13 -0800 Subject: [PATCH] [X86] Add sse2 command lines to sse-intrinsics-fast-isel.ll. The extra available vector types on sse2 causes us to produce different code. --- .../CodeGen/X86/sse-intrinsics-fast-isel.ll | 143 ++++++++++++------ 1 file changed, 95 insertions(+), 48 deletions(-) diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 9f51a94051330..85400656e2e54 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE +; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE,X86-SSE1 +; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE,X86-SSE2 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 -; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE +; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE,X64-SSE1 +; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE,X64-SSE2 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 ; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 @@ -72,10 +74,17 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { } define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind { -; SSE-LABEL: test_mm_andnot_ps: -; SSE: # %bb.0: -; SSE-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] -; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-SSE1-LABEL: test_mm_andnot_ps: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] +; X86-SSE1-NEXT: retl # encoding: [0xc3] +; +; X86-SSE2-LABEL: test_mm_andnot_ps: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2] +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2] +; X86-SSE2-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1] +; X86-SSE2-NEXT: retl # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_andnot_ps: ; AVX1: # %bb.0: @@ -89,6 +98,18 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f] ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] ; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; X64-SSE1-LABEL: test_mm_andnot_ps: +; X64-SSE1: # %bb.0: +; X64-SSE1-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] +; X64-SSE1-NEXT: retq # encoding: [0xc3] +; +; X64-SSE2-LABEL: test_mm_andnot_ps: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2] +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2] +; X64-SSE2-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1] +; X64-SSE2-NEXT: retq # encoding: [0xc3] %arg0 = bitcast <4 x float> %a0 to <4 x i32> %arg1 = bitcast <4 x float> %a1 to <4 x i32> %not = xor <4 x i32> %arg0, @@ -2727,21 +2748,27 @@ define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) { } define void @test_mm_storeh_pi(x86_mmx *%a0, <4 x float> %a1) nounwind { -; X86-SSE-LABEL: test_mm_storeh_pi: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] -; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] -; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] -; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] -; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] -; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] -; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] -; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] -; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] -; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] -; X86-SSE-NEXT: retl # encoding: [0xc3] +; X86-SSE1-LABEL: test_mm_storeh_pi: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp # encoding: [0x55] +; X86-SSE1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-SSE1-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] +; X86-SSE1-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] +; X86-SSE1-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] +; X86-SSE1-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] +; X86-SSE1-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] +; X86-SSE1-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] +; X86-SSE1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-SSE1-NEXT: popl %ebp # encoding: [0x5d] +; X86-SSE1-NEXT: retl # encoding: [0xc3] +; +; X86-SSE2-LABEL: test_mm_storeh_pi: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE2-NEXT: movhps %xmm0, (%eax) # encoding: [0x0f,0x17,0x00] +; X86-SSE2-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_storeh_pi: ; X86-AVX1: # %bb.0: @@ -2755,12 +2782,20 @@ define void @test_mm_storeh_pi(x86_mmx *%a0, <4 x float> %a1) nounwind { ; X86-AVX512-NEXT: vmovhps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x00] ; X86-AVX512-NEXT: retl # encoding: [0xc3] ; -; X64-SSE-LABEL: test_mm_storeh_pi: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] -; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xf0] -; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] -; X64-SSE-NEXT: retq # encoding: [0xc3] +; X64-SSE1-LABEL: test_mm_storeh_pi: +; X64-SSE1: # %bb.0: +; X64-SSE1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] +; X64-SSE1-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xf0] +; X64-SSE1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] +; X64-SSE1-NEXT: retq # encoding: [0xc3] +; +; X64-SSE2-LABEL: test_mm_storeh_pi: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: pshufd $78, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x4e] +; X64-SSE2-NEXT: # xmm0 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] +; X64-SSE2-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] +; X64-SSE2-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_storeh_pi: ; X64-AVX1: # %bb.0: @@ -2820,21 +2855,27 @@ define void @test_mm_storeh_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind { } define void @test_mm_storel_pi(x86_mmx *%a0, <4 x float> %a1) nounwind { -; X86-SSE-LABEL: test_mm_storel_pi: -; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] -; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] -; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] -; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] -; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] -; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] -; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] -; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] -; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] -; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] -; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] -; X86-SSE-NEXT: retl # encoding: [0xc3] +; X86-SSE1-LABEL: test_mm_storel_pi: +; X86-SSE1: # %bb.0: +; X86-SSE1-NEXT: pushl %ebp # encoding: [0x55] +; X86-SSE1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-SSE1-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] +; X86-SSE1-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] +; X86-SSE1-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] +; X86-SSE1-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] +; X86-SSE1-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] +; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] +; X86-SSE1-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] +; X86-SSE1-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] +; X86-SSE1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-SSE1-NEXT: popl %ebp # encoding: [0x5d] +; X86-SSE1-NEXT: retl # encoding: [0xc3] +; +; X86-SSE2-LABEL: test_mm_storel_pi: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE2-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00] +; X86-SSE2-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_storel_pi: ; X86-AVX1: # %bb.0: @@ -2848,12 +2889,18 @@ define void @test_mm_storel_pi(x86_mmx *%a0, <4 x float> %a1) nounwind { ; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] ; X86-AVX512-NEXT: retl # encoding: [0xc3] ; -; X64-SSE-LABEL: test_mm_storel_pi: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] -; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8] -; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] -; X64-SSE-NEXT: retq # encoding: [0xc3] +; X64-SSE1-LABEL: test_mm_storel_pi: +; X64-SSE1: # %bb.0: +; X64-SSE1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] +; X64-SSE1-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8] +; X64-SSE1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] +; X64-SSE1-NEXT: retq # encoding: [0xc3] +; +; X64-SSE2-LABEL: test_mm_storel_pi: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] +; X64-SSE2-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] +; X64-SSE2-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_storel_pi: ; X64-AVX1: # %bb.0: