Skip to content

Commit

Permalink
[X86] Add avx512f only command lines to the vector add/sub saturation…
Browse files Browse the repository at this point in the history
… tests. NFC

Gives us coverage of splitting the v32i16/v64i8 when we have
avx512f and not avx512bw.

Considering making v32i16/v64i8 a legal type on avx512f which
needs this test coverage.
  • Loading branch information
topperc committed Mar 14, 2020
1 parent b64afad commit 1ffc507
Show file tree
Hide file tree
Showing 4 changed files with 560 additions and 247 deletions.
252 changes: 169 additions & 83 deletions llvm/test/CodeGen/X86/sadd_sat_vec.ll
Expand Up @@ -4,7 +4,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW

declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>)
declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
Expand Down Expand Up @@ -111,10 +112,19 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX2-NEXT: vpaddsb %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: v64i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddsb %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
; AVX512F-LABEL: v64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpaddsb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpaddsb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpaddsb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z
}
Expand Down Expand Up @@ -191,10 +201,19 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
; AVX2-NEXT: vpaddsw %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: v32i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
; AVX512F-LABEL: v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpaddsw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpaddsw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z
}
Expand Down Expand Up @@ -551,15 +570,28 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: v16i1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm1
; AVX512-NEXT: vpmovb2m %xmm1, %k0
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX512-NEXT: vpmovb2m %xmm0, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: vpmovm2b %k0, %xmm0
; AVX512-NEXT: retq
; AVX512F-LABEL: v16i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $7, %xmm1, %xmm1
; AVX512BW-NEXT: vpmovb2m %xmm1, %k0
; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX512BW-NEXT: vpmovb2m %xmm0, %k1
; AVX512BW-NEXT: korw %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
}
Expand Down Expand Up @@ -639,19 +671,30 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: v2i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k0
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k1
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; AVX512-NEXT: vmovdqa %xmm1, %xmm0
; AVX512-NEXT: retq
; AVX512F-LABEL: v2i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm2
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
; AVX512F-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm3
; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k0
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
ret <2 x i32> %z
}
Expand Down Expand Up @@ -729,19 +772,30 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: v4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k0
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k1
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; AVX512-NEXT: vmovdqa %xmm1, %xmm0
; AVX512-NEXT: retq
; AVX512F-LABEL: v4i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm2
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647]
; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
; AVX512F-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm3
; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k0
; AVX512BW-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm2, %k2
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k2}
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %z
}
Expand Down Expand Up @@ -866,19 +920,30 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX2-NEXT: vblendvps %ymm0, %ymm3, %ymm2, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm2, %k0
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm1
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k1
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm2, %k2
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; AVX512-NEXT: vmovdqa %ymm1, %ymm0
; AVX512-NEXT: retq
; AVX512F-LABEL: v8i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm3 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX512F-NEXT: vblendvps %ymm2, %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpxor %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vblendvps %ymm0, %ymm3, %ymm2, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v8i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm2, %k0
; AVX512BW-NEXT: vpaddd %ymm1, %ymm0, %ymm1
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtd %ymm1, %ymm2, %k2
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k2}
; AVX512BW-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
%z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z
}
Expand Down Expand Up @@ -1221,19 +1286,29 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; AVX2-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm2, %k0
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k1
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm2, %k2
; AVX512-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
; AVX512-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
; AVX512-NEXT: vmovdqa %xmm1, %xmm0
; AVX512-NEXT: retq
; AVX512F-LABEL: v2i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpaddq %xmm1, %xmm0, %xmm2
; AVX512F-NEXT: vmovapd {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
; AVX512F-NEXT: vblendvpd %xmm2, {{.*}}(%rip), %xmm3, %xmm3
; AVX512F-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0
; AVX512F-NEXT: vblendvpd %xmm0, %xmm3, %xmm2, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v2i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm2, %k0
; AVX512BW-NEXT: vpaddq %xmm1, %xmm0, %xmm1
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm2, %k2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
; AVX512BW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k2}
; AVX512BW-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0
; AVX512BW-NEXT: retq
%z = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %z
}
Expand Down Expand Up @@ -1426,19 +1501,30 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm2, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm2, %k0
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm1
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512-NEXT: kxorw %k1, %k0, %k1
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm2, %k2
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; AVX512-NEXT: vmovdqa %ymm1, %ymm0
; AVX512-NEXT: retq
; AVX512F-LABEL: v4i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpaddq %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm3 = [9223372036854775807,9223372036854775807,9223372036854775807,9223372036854775807]
; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX512F-NEXT: vblendvpd %ymm2, %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpxor %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: vblendvpd %ymm0, %ymm3, %ymm2, %ymm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: v4i64:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm2, %k0
; AVX512BW-NEXT: vpaddq %ymm1, %ymm0, %ymm1
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm2, %k2
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX512BW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k2}
; AVX512BW-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
; AVX512BW-NEXT: retq
%z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z
}
Expand Down

0 comments on commit 1ffc507

Please sign in to comment.