Skip to content

Commit

Permalink
[X86][SSE] Add SSE2 extract-concat tests
Browse files Browse the repository at this point in the history
Check pre-SSE41 codegen where we have less PEXTR*/PINSR* instructions
  • Loading branch information
RKSimon committed Apr 23, 2020
1 parent 1e2772c commit 757c7c2
Showing 1 changed file with 56 additions and 40 deletions.
96 changes: 56 additions & 40 deletions llvm/test/CodeGen/X86/extract-concat.ll
@@ -1,10 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE42
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512F

define void @foo(<4 x float> %in, <4 x i8>* %out) {
; SSE2-LABEL: foo:
; SSE2: # %bb.0:
; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
; SSE2-NEXT: shll $8, %ecx
; SSE2-NEXT: orl %eax, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl $65280, %eax # imm = 0xFF00
; SSE2-NEXT: orl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: pinsrw $1, %eax, %xmm0
; SSE2-NEXT: movd %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSE42-LABEL: foo:
; SSE42: # %bb.0:
; SSE42-NEXT: cvttps2dq %xmm0, %xmm0
Expand Down Expand Up @@ -39,22 +55,22 @@ define void @foo(<4 x float> %in, <4 x i8>* %out) {
}

define <16 x i64> @catcat(<4 x i64> %x) {
; SSE42-LABEL: catcat:
; SSE42: # %bb.0:
; SSE42-NEXT: movq %rdi, %rax
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE42-NEXT: movdqa %xmm1, 112(%rdi)
; SSE42-NEXT: movdqa %xmm1, 96(%rdi)
; SSE42-NEXT: movdqa %xmm3, 80(%rdi)
; SSE42-NEXT: movdqa %xmm3, 64(%rdi)
; SSE42-NEXT: movdqa %xmm0, 48(%rdi)
; SSE42-NEXT: movdqa %xmm0, 32(%rdi)
; SSE42-NEXT: movdqa %xmm2, 16(%rdi)
; SSE42-NEXT: movdqa %xmm2, (%rdi)
; SSE42-NEXT: retq
; SSE-LABEL: catcat:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE-NEXT: movdqa %xmm1, 112(%rdi)
; SSE-NEXT: movdqa %xmm1, 96(%rdi)
; SSE-NEXT: movdqa %xmm3, 80(%rdi)
; SSE-NEXT: movdqa %xmm3, 64(%rdi)
; SSE-NEXT: movdqa %xmm0, 48(%rdi)
; SSE-NEXT: movdqa %xmm0, 32(%rdi)
; SSE-NEXT: movdqa %xmm2, 16(%rdi)
; SSE-NEXT: movdqa %xmm2, (%rdi)
; SSE-NEXT: retq
;
; AVX1-LABEL: catcat:
; AVX1: # %bb.0:
Expand Down Expand Up @@ -93,24 +109,24 @@ define <16 x i64> @catcat(<4 x i64> %x) {
}

define <16 x i64> @load_catcat(<4 x i64>* %p) {
; SSE42-LABEL: load_catcat:
; SSE42: # %bb.0:
; SSE42-NEXT: movq %rdi, %rax
; SSE42-NEXT: movdqa (%rsi), %xmm0
; SSE42-NEXT: movdqa 16(%rsi), %xmm1
; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE42-NEXT: movdqa %xmm1, 112(%rdi)
; SSE42-NEXT: movdqa %xmm1, 96(%rdi)
; SSE42-NEXT: movdqa %xmm3, 80(%rdi)
; SSE42-NEXT: movdqa %xmm3, 64(%rdi)
; SSE42-NEXT: movdqa %xmm0, 48(%rdi)
; SSE42-NEXT: movdqa %xmm0, 32(%rdi)
; SSE42-NEXT: movdqa %xmm2, 16(%rdi)
; SSE42-NEXT: movdqa %xmm2, (%rdi)
; SSE42-NEXT: retq
; SSE-LABEL: load_catcat:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: movdqa (%rsi), %xmm0
; SSE-NEXT: movdqa 16(%rsi), %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE-NEXT: movdqa %xmm1, 112(%rdi)
; SSE-NEXT: movdqa %xmm1, 96(%rdi)
; SSE-NEXT: movdqa %xmm3, 80(%rdi)
; SSE-NEXT: movdqa %xmm3, 64(%rdi)
; SSE-NEXT: movdqa %xmm0, 48(%rdi)
; SSE-NEXT: movdqa %xmm0, 32(%rdi)
; SSE-NEXT: movdqa %xmm2, 16(%rdi)
; SSE-NEXT: movdqa %xmm2, (%rdi)
; SSE-NEXT: retq
;
; AVX1-LABEL: load_catcat:
; AVX1: # %bb.0:
Expand Down Expand Up @@ -147,11 +163,11 @@ define <16 x i64> @load_catcat(<4 x i64>* %p) {
; the source ops are not an even multiple size of the result.

define <4 x i32> @cat_ext_straddle(<6 x i32>* %px, <6 x i32>* %py) {
; SSE42-LABEL: cat_ext_straddle:
; SSE42: # %bb.0:
; SSE42-NEXT: movaps 16(%rdi), %xmm0
; SSE42-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; SSE42-NEXT: retq
; SSE-LABEL: cat_ext_straddle:
; SSE: # %bb.0:
; SSE-NEXT: movaps 16(%rdi), %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; SSE-NEXT: retq
;
; AVX-LABEL: cat_ext_straddle:
; AVX: # %bb.0:
Expand Down

0 comments on commit 757c7c2

Please sign in to comment.