563 changes: 387 additions & 176 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll

Large diffs are not rendered by default.

434 changes: 296 additions & 138 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll

Large diffs are not rendered by default.

234 changes: 160 additions & 74 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

Large diffs are not rendered by default.

917 changes: 624 additions & 293 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll

Large diffs are not rendered by default.

24 changes: 16 additions & 8 deletions llvm/test/CodeGen/X86/vector-shuffle-combining.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX2-SLOW
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX2-FAST,AVX2-FAST-ALL
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX2-FAST,AVX2-FAST-PERLANE
;
; Verify that the DAG combiner correctly folds bitwise operations across
; shuffles, nested shuffles with undef, pairs of nested shuffles, and other
Expand Down Expand Up @@ -2401,13 +2402,20 @@ define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) {
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: combine_unneeded_subvector1:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX2-FAST-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: combine_unneeded_subvector1:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: combine_unneeded_subvector1:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX2-FAST-PERLANE-NEXT: retq
%b = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
%c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
ret <8 x i32> %c
Expand Down
79 changes: 53 additions & 26 deletions llvm/test/CodeGen/X86/vector-shuffle-v1.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefix=VL_BW_DQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST-ALL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST-PERLANE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=VL_BW_DQ --check-prefix=VL_BW_DQ-FAST-ALL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+avx512dq,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=VL_BW_DQ --check-prefix=VL_BW_DQ-FAST-PERLANE

define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) {
; AVX512F-LABEL: shuf2i1_1_0:
Expand Down Expand Up @@ -495,30 +497,55 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) {
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: kmovw %edi, %k1
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm2 = [2,2,2,2,2,2,2,2]
; AVX512VL-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX512VL-NEXT: vpslld $31, %ymm1, %ymm1
; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: kmovd %edi, %k0
; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
; VL_BW_DQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
; VL_BW_DQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
; AVX512VL-FAST-ALL-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; AVX512VL-FAST-ALL: # %bb.0:
; AVX512VL-FAST-ALL-NEXT: kmovw %edi, %k1
; AVX512VL-FAST-ALL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-FAST-ALL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
; AVX512VL-FAST-ALL-NEXT: vpbroadcastd {{.*#+}} ymm2 = [2,2,2,2,2,2,2,2]
; AVX512VL-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX512VL-FAST-ALL-NEXT: vpslld $31, %ymm1, %ymm1
; AVX512VL-FAST-ALL-NEXT: vptestmd %ymm1, %ymm1, %k1
; AVX512VL-FAST-ALL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-FAST-ALL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512VL-FAST-ALL-NEXT: vzeroupper
; AVX512VL-FAST-ALL-NEXT: retq
;
; AVX512VL-FAST-PERLANE-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; AVX512VL-FAST-PERLANE: # %bb.0:
; AVX512VL-FAST-PERLANE-NEXT: kmovw %edi, %k1
; AVX512VL-FAST-PERLANE-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-FAST-PERLANE-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
; AVX512VL-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
; AVX512VL-FAST-PERLANE-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX512VL-FAST-PERLANE-NEXT: vpslld $31, %ymm1, %ymm1
; AVX512VL-FAST-PERLANE-NEXT: vptestmd %ymm1, %ymm1, %k1
; AVX512VL-FAST-PERLANE-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-FAST-PERLANE-NEXT: vpmovdw %ymm0, %xmm0
; AVX512VL-FAST-PERLANE-NEXT: vzeroupper
; AVX512VL-FAST-PERLANE-NEXT: retq
;
; VL_BW_DQ-FAST-ALL-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; VL_BW_DQ-FAST-ALL: # %bb.0:
; VL_BW_DQ-FAST-ALL-NEXT: kmovd %edi, %k0
; VL_BW_DQ-FAST-ALL-NEXT: vpmovm2d %k0, %ymm0
; VL_BW_DQ-FAST-ALL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
; VL_BW_DQ-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; VL_BW_DQ-FAST-ALL-NEXT: vpmovd2m %ymm0, %k0
; VL_BW_DQ-FAST-ALL-NEXT: vpmovm2w %k0, %xmm0
; VL_BW_DQ-FAST-ALL-NEXT: vzeroupper
; VL_BW_DQ-FAST-ALL-NEXT: retq
;
; VL_BW_DQ-FAST-PERLANE-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
; VL_BW_DQ-FAST-PERLANE: # %bb.0:
; VL_BW_DQ-FAST-PERLANE-NEXT: kmovd %edi, %k0
; VL_BW_DQ-FAST-PERLANE-NEXT: vpmovm2d %k0, %ymm0
; VL_BW_DQ-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
; VL_BW_DQ-FAST-PERLANE-NEXT: vpbroadcastq %xmm0, %ymm0
; VL_BW_DQ-FAST-PERLANE-NEXT: vpmovd2m %ymm0, %k0
; VL_BW_DQ-FAST-PERLANE-NEXT: vpmovm2w %k0, %xmm0
; VL_BW_DQ-FAST-PERLANE-NEXT: vzeroupper
; VL_BW_DQ-FAST-PERLANE-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector < 8 x i1> %b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef>
ret <8 x i1> %c
Expand Down
287 changes: 194 additions & 93 deletions llvm/test/CodeGen/X86/vector-trunc-math.ll

Large diffs are not rendered by default.

108 changes: 72 additions & 36 deletions llvm/test/CodeGen/X86/vector-trunc-packus.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-ALL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-PERLANE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=SKX

;
Expand Down Expand Up @@ -502,19 +506,32 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) {
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_packus_v4i64_v4i32:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1
; AVX2-FAST-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: trunc_packus_v4i64_v4i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1
; AVX2-FAST-ALL-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-ALL-NEXT: vzeroupper
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: trunc_packus_v4i64_v4i32:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-FAST-PERLANE-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1
; AVX2-FAST-PERLANE-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; AVX2-FAST-PERLANE-NEXT: vzeroupper
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX512F-LABEL: trunc_packus_v4i64_v4i32:
; AVX512F: # %bb.0:
Expand Down Expand Up @@ -923,25 +940,44 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-wid
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_packus_v8i64_v8i32:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-FAST-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
; AVX2-FAST-NEXT: vpand %ymm1, %ymm3, %ymm1
; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
; AVX2-FAST-NEXT: vpand %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: trunc_packus_v8i64_v8i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-FAST-ALL-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
; AVX2-FAST-ALL-NEXT: vpand %ymm1, %ymm3, %ymm1
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
; AVX2-FAST-ALL-NEXT: vpand %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: trunc_packus_v8i64_v8i32:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-FAST-PERLANE-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-FAST-PERLANE-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
; AVX2-FAST-PERLANE-NEXT: vpand %ymm1, %ymm3, %ymm1
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2
; AVX2-FAST-PERLANE-NEXT: vpand %ymm0, %ymm2, %ymm0
; AVX2-FAST-PERLANE-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX512-LABEL: trunc_packus_v8i64_v8i32:
; AVX512: # %bb.0:
Expand Down
108 changes: 72 additions & 36 deletions llvm/test/CodeGen/X86/vector-trunc-ssat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-ALL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-PERLANE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=SKX

;
Expand Down Expand Up @@ -508,19 +512,32 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) {
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_ssat_v4i64_v4i32:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: vmovapd {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: trunc_ssat_v4i64_v4i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-ALL-NEXT: vzeroupper
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: trunc_ssat_v4i64_v4i32:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647]
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; AVX2-FAST-PERLANE-NEXT: vzeroupper
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX512F-LABEL: trunc_ssat_v4i64_v4i32:
; AVX512F: # %bb.0:
Expand Down Expand Up @@ -945,25 +962,44 @@ define <8 x i32> @trunc_ssat_v8i64_v8i32(<8 x i64>* %p0) "min-legal-vector-width
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_ssat_v8i64_v8i32:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-FAST-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3
; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: trunc_ssat_v8i64_v8i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-FAST-ALL-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: trunc_ssat_v8i64_v8i32:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-FAST-PERLANE-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647]
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968]
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX512-LABEL: trunc_ssat_v8i64_v8i32:
; AVX512: # %bb.0:
Expand Down
115 changes: 75 additions & 40 deletions llvm/test/CodeGen/X86/vector-trunc-usat.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-ALL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-PERLANE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=SKX

;
Expand Down Expand Up @@ -349,19 +353,32 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_usat_v4i64_v4i32:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX2-FAST-NEXT: vpxor %ymm1, %ymm0, %ymm1
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103]
; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,429496729]
; AVX2-FAST-NEXT: vblendvpd %ymm1, %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vmovapd {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: trunc_usat_v4i64_v4i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX2-FAST-ALL-NEXT: vpxor %ymm1, %ymm0, %ymm1
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103]
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,429496729]
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm1, %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX2-FAST-ALL-NEXT: vzeroupper
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: trunc_usat_v4i64_v4i32:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX2-FAST-PERLANE-NEXT: vpxor %ymm1, %ymm0, %ymm1
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103]
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
; AVX2-FAST-PERLANE-NEXT: vmovapd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,429496729]
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm1, %ymm0, %ymm2, %ymm0
; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; AVX2-FAST-PERLANE-NEXT: vzeroupper
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v4i64_v4i32:
; AVX512F: # %bb.0:
Expand Down Expand Up @@ -645,24 +662,42 @@ define <8 x i32> @trunc_usat_v8i64_v8i32(<8 x i64>* %p0) {
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_usat_v8i64_v8i32:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-FAST-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX2-FAST-NEXT: vpxor %ymm3, %ymm1, %ymm4
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103]
; AVX2-FAST-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4
; AVX2-FAST-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vpxor %ymm3, %ymm0, %ymm3
; AVX2-FAST-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3
; AVX2-FAST-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: trunc_usat_v8i64_v8i32:
; AVX2-FAST-ALL: # %bb.0:
; AVX2-FAST-ALL-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-FAST-ALL-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm1, %ymm4
; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103]
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm0, %ymm3
; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3
; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: trunc_usat_v8i64_v8i32:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-FAST-PERLANE-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295]
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm1, %ymm4
; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103]
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1
; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm0, %ymm3
; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3
; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0
; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX512-LABEL: trunc_usat_v8i64_v8i32:
; AVX512: # %bb.0:
Expand Down
96 changes: 64 additions & 32 deletions llvm/test/CodeGen/X86/vector-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-ALL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-PERLANE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL

define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
; SSE-LABEL: trunc8i64_8i32:
Expand All @@ -32,13 +36,20 @@ define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc8i64_8i32:
; AVX2-FAST: # %bb.0: # %entry
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: trunc8i64_8i32:
; AVX2-FAST-ALL: # %bb.0: # %entry
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32:
; AVX2-FAST-PERLANE: # %bb.0: # %entry
; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX512-LABEL: trunc8i64_8i32:
; AVX512: # %bb.0: # %entry
Expand Down Expand Up @@ -71,13 +82,20 @@ define <8 x i32> @trunc8i64_8i32_ashr(<8 x i64> %a) {
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm2[1,3],ymm0[5,7],ymm2[5,7]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc8i64_8i32_ashr:
; AVX2-FAST: # %bb.0: # %entry
; AVX2-FAST-NEXT: vmovaps {{.*#+}} xmm2 = [1,3,5,7]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_ashr:
; AVX2-FAST-ALL: # %bb.0: # %entry
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} xmm2 = [1,3,5,7]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_ashr:
; AVX2-FAST-PERLANE: # %bb.0: # %entry
; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm2[1,3],ymm0[5,7],ymm2[5,7]
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX512-LABEL: trunc8i64_8i32_ashr:
; AVX512: # %bb.0: # %entry
Expand Down Expand Up @@ -112,13 +130,20 @@ define <8 x i32> @trunc8i64_8i32_lshr(<8 x i64> %a) {
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm2[1,3],ymm0[5,7],ymm2[5,7]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc8i64_8i32_lshr:
; AVX2-FAST: # %bb.0: # %entry
; AVX2-FAST-NEXT: vmovaps {{.*#+}} xmm2 = [1,3,5,7]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_lshr:
; AVX2-FAST-ALL: # %bb.0: # %entry
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} xmm2 = [1,3,5,7]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_lshr:
; AVX2-FAST-PERLANE: # %bb.0: # %entry
; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm2[1,3],ymm0[5,7],ymm2[5,7]
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX512-LABEL: trunc8i64_8i32_lshr:
; AVX512: # %bb.0: # %entry
Expand Down Expand Up @@ -1354,13 +1379,20 @@ define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc2x4i64_8i32:
; AVX2-FAST: # %bb.0: # %entry
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-NEXT: retq
; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32:
; AVX2-FAST-ALL: # %bb.0: # %entry
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32:
; AVX2-FAST-PERLANE: # %bb.0: # %entry
; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX512F-LABEL: trunc2x4i64_8i32:
; AVX512F: # %bb.0: # %entry
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/vector-zext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-SLOW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW

define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: zext_16i8_to_8i16:
Expand Down