diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index 1a7599055b5772..bc2edf43be38de 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -1,17 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,SKX define i1 @allones_v16i8_sign(<16 x i8> %arg) { -; SSE2-LABEL: allones_v16i8_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpw $-1, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v16i8_sign: +; SSE: # %bb.0: +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpw $-1, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX-LABEL: allones_v16i8_sign: ; AVX: # %bb.0: @@ -19,20 +20,6 @@ define i1 @allones_v16i8_sign(<16 x i8> %arg) { ; AVX-NEXT: cmpw $-1, %ax ; AVX-NEXT: sete %al ; AVX-NEXT: retq -; -; KNL-LABEL: allones_v16i8_sign: -; KNL: # %bb.0: -; KNL-NEXT: vpmovmskb %xmm0, %eax -; KNL-NEXT: cmpw $-1, %ax -; KNL-NEXT: sete %al -; KNL-NEXT: retq -; -; SKX-LABEL: allones_v16i8_sign: -; SKX: # %bb.0: -; SKX-NEXT: vpmovmskb %xmm0, %eax -; SKX-NEXT: cmpw $-1, %ax -; SKX-NEXT: sete %al -; SKX-NEXT: retq %tmp = icmp slt <16 x i8> %arg, zeroinitializer %tmp1 = bitcast <16 x i1> %tmp to i16 %tmp2 = icmp eq i16 %tmp1, -1 @@ -40,12 +27,12 @@ define i1 @allones_v16i8_sign(<16 x i8> %arg) { } define i1 @allzeros_v16i8_sign(<16 x i8> %arg) { -; SSE2-LABEL: allzeros_v16i8_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testw %ax, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v16i8_sign: +; SSE: # %bb.0: +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX-LABEL: allzeros_v16i8_sign: ; AVX: # %bb.0: @@ -53,20 +40,6 @@ define i1 @allzeros_v16i8_sign(<16 x i8> %arg) { ; AVX-NEXT: testw %ax, %ax ; AVX-NEXT: sete %al ; AVX-NEXT: retq -; -; KNL-LABEL: allzeros_v16i8_sign: -; KNL: # %bb.0: -; KNL-NEXT: vpmovmskb %xmm0, %eax -; KNL-NEXT: testw %ax, %ax -; KNL-NEXT: sete %al -; KNL-NEXT: retq -; -; SKX-LABEL: allzeros_v16i8_sign: -; SKX: # %bb.0: -; SKX-NEXT: vpmovmskb %xmm0, %eax -; SKX-NEXT: testw %ax, %ax -; SKX-NEXT: sete %al -; SKX-NEXT: retq %tmp = icmp slt <16 x i8> %arg, zeroinitializer %tmp1 = bitcast <16 x i1> %tmp to i16 %tmp2 = icmp eq i16 %tmp1, 0 @@ -74,15 +47,15 @@ define i1 @allzeros_v16i8_sign(<16 x i8> %arg) { } define i1 @allones_v32i8_sign(<32 x i8> %arg) { -; SSE2-LABEL: allones_v32i8_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: cmpl $-1, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v32i8_sign: +; SSE: # %bb.0: +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v32i8_sign: ; AVX1: # %bb.0: @@ -104,21 +77,13 @@ define i1 @allones_v32i8_sign(<32 x i8> %arg) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; KNL-LABEL: allones_v32i8_sign: -; KNL: # %bb.0: -; KNL-NEXT: vpmovmskb %ymm0, %eax -; KNL-NEXT: cmpl $-1, %eax -; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: allones_v32i8_sign: -; SKX: # %bb.0: -; SKX-NEXT: vpmovmskb %ymm0, %eax -; SKX-NEXT: cmpl $-1, %eax -; SKX-NEXT: sete %al -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq +; AVX512-LABEL: allones_v32i8_sign: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovmskb %ymm0, %eax +; AVX512-NEXT: cmpl $-1, %eax +; AVX512-NEXT: sete %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %tmp = icmp slt <32 x i8> %arg, zeroinitializer %tmp1 = bitcast <32 x i1> %tmp to i32 %tmp2 = icmp eq i32 %tmp1, -1 @@ -126,14 +91,14 @@ define i1 @allones_v32i8_sign(<32 x i8> %arg) { } define i1 @allzeros_v32i8_sign(<32 x i8> %arg) { -; SSE2-LABEL: allzeros_v32i8_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v32i8_sign: +; SSE: # %bb.0: +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i8_sign: ; AVX1: # %bb.0: @@ -154,21 +119,13 @@ define i1 @allzeros_v32i8_sign(<32 x i8> %arg) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; KNL-LABEL: allzeros_v32i8_sign: -; KNL: # %bb.0: -; KNL-NEXT: vpmovmskb %ymm0, %eax -; KNL-NEXT: testl %eax, %eax -; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: allzeros_v32i8_sign: -; SKX: # %bb.0: -; SKX-NEXT: vpmovmskb %ymm0, %eax -; SKX-NEXT: testl %eax, %eax -; SKX-NEXT: sete %al -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq +; AVX512-LABEL: allzeros_v32i8_sign: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovmskb %ymm0, %eax +; AVX512-NEXT: testl %eax, %eax +; AVX512-NEXT: sete %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %tmp = icmp slt <32 x i8> %arg, zeroinitializer %tmp1 = bitcast <32 x i1> %tmp to i32 %tmp2 = icmp eq i32 %tmp1, 0 @@ -176,21 +133,21 @@ define i1 @allzeros_v32i8_sign(<32 x i8> %arg) { } define i1 @allones_v64i8_sign(<64 x i8> %arg) { -; SSE2-LABEL: allones_v64i8_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: pmovmskb %xmm2, %eax -; SSE2-NEXT: pmovmskb %xmm3, %edx -; SSE2-NEXT: shll $16, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: shlq $32, %rdx -; SSE2-NEXT: orq %rcx, %rdx -; SSE2-NEXT: cmpq $-1, %rdx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v64i8_sign: +; SSE: # %bb.0: +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: pmovmskb %xmm3, %edx +; SSE-NEXT: shll $16, %edx +; SSE-NEXT: orl %eax, %edx +; SSE-NEXT: shlq $32, %rdx +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: cmpq $-1, %rdx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v64i8_sign: ; AVX1: # %bb.0: @@ -248,20 +205,20 @@ define i1 @allones_v64i8_sign(<64 x i8> %arg) { } define i1 @allzeros_v64i8_sign(<64 x i8> %arg) { -; SSE2-LABEL: allzeros_v64i8_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: pmovmskb %xmm2, %eax -; SSE2-NEXT: pmovmskb %xmm3, %edx -; SSE2-NEXT: shll $16, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: shlq $32, %rdx -; SSE2-NEXT: orq %rcx, %rdx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v64i8_sign: +; SSE: # %bb.0: +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: pmovmskb %xmm3, %edx +; SSE-NEXT: shll $16, %edx +; SSE-NEXT: orl %eax, %edx +; SSE-NEXT: shlq $32, %rdx +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v64i8_sign: ; AVX1: # %bb.0: @@ -316,21 +273,21 @@ define i1 @allzeros_v64i8_sign(<64 x i8> %arg) { } define i1 @allones_v8i16_sign(<8 x i16> %arg) { -; SSE2-LABEL: allones_v8i16_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpb $-1, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allones_v8i16_sign: -; AVX: # %bb.0: -; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: cmpb $-1, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allones_v8i16_sign: +; SSE: # %bb.0: +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpb $-1, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allones_v8i16_sign: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: cmpb $-1, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v8i16_sign: ; KNL: # %bb.0: @@ -357,19 +314,19 @@ define i1 @allones_v8i16_sign(<8 x i16> %arg) { } define i1 @allzeros_v8i16_sign(<8 x i16> %arg) { -; SSE2-LABEL: allzeros_v8i16_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allzeros_v8i16_sign: -; AVX: # %bb.0: -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: testl $43690, %eax # imm = 0xAAAA -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allzeros_v8i16_sign: +; SSE: # %bb.0: +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v8i16_sign: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: testl $43690, %eax # imm = 0xAAAA +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v8i16_sign: ; KNL: # %bb.0: @@ -396,13 +353,13 @@ define i1 @allzeros_v8i16_sign(<8 x i16> %arg) { } define i1 @allones_v16i16_sign(<16 x i16> %arg) { -; SSE2-LABEL: allones_v16i16_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpw $-1, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v16i16_sign: +; SSE: # %bb.0: +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpw $-1, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v16i16_sign: ; AVX1: # %bb.0: @@ -449,13 +406,13 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) { } define i1 @allzeros_v16i16_sign(<16 x i16> %arg) { -; SSE2-LABEL: allzeros_v16i16_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testw %ax, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v16i16_sign: +; SSE: # %bb.0: +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v16i16_sign: ; AVX1: # %bb.0: @@ -500,17 +457,17 @@ define i1 @allzeros_v16i16_sign(<16 x i16> %arg) { } define i1 @allones_v32i16_sign(<32 x i16> %arg) { -; SSE2-LABEL: allones_v32i16_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: packsswb %xmm3, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: cmpl $-1, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v32i16_sign: +; SSE: # %bb.0: +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v32i16_sign: ; AVX1: # %bb.0: @@ -569,16 +526,16 @@ define i1 @allones_v32i16_sign(<32 x i16> %arg) { } define i1 @allzeros_v32i16_sign(<32 x i16> %arg) { -; SSE2-LABEL: allzeros_v32i16_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: packsswb %xmm3, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v32i16_sign: +; SSE: # %bb.0: +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i16_sign: ; AVX1: # %bb.0: @@ -635,12 +592,12 @@ define i1 @allzeros_v32i16_sign(<32 x i16> %arg) { } define i1 @allones_v4i32_sign(<4 x i32> %arg) { -; SSE2-LABEL: allones_v4i32_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: cmpb $15, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v4i32_sign: +; SSE: # %bb.0: +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: cmpb $15, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX-LABEL: allones_v4i32_sign: ; AVX: # %bb.0: @@ -648,20 +605,6 @@ define i1 @allones_v4i32_sign(<4 x i32> %arg) { ; AVX-NEXT: cmpb $15, %al ; AVX-NEXT: sete %al ; AVX-NEXT: retq -; -; KNL-LABEL: allones_v4i32_sign: -; KNL: # %bb.0: -; KNL-NEXT: vmovmskps %xmm0, %eax -; KNL-NEXT: cmpb $15, %al -; KNL-NEXT: sete %al -; KNL-NEXT: retq -; -; SKX-LABEL: allones_v4i32_sign: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskps %xmm0, %eax -; SKX-NEXT: cmpb $15, %al -; SKX-NEXT: sete %al -; SKX-NEXT: retq %tmp = icmp slt <4 x i32> %arg, zeroinitializer %tmp1 = bitcast <4 x i1> %tmp to i4 %tmp2 = icmp eq i4 %tmp1, -1 @@ -669,12 +612,12 @@ define i1 @allones_v4i32_sign(<4 x i32> %arg) { } define i1 @allzeros_v4i32_sign(<4 x i32> %arg) { -; SSE2-LABEL: allzeros_v4i32_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: testb %al, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v4i32_sign: +; SSE: # %bb.0: +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: testb %al, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX-LABEL: allzeros_v4i32_sign: ; AVX: # %bb.0: @@ -682,20 +625,6 @@ define i1 @allzeros_v4i32_sign(<4 x i32> %arg) { ; AVX-NEXT: testb %al, %al ; AVX-NEXT: sete %al ; AVX-NEXT: retq -; -; KNL-LABEL: allzeros_v4i32_sign: -; KNL: # %bb.0: -; KNL-NEXT: vmovmskps %xmm0, %eax -; KNL-NEXT: testb %al, %al -; KNL-NEXT: sete %al -; KNL-NEXT: retq -; -; SKX-LABEL: allzeros_v4i32_sign: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskps %xmm0, %eax -; SKX-NEXT: testb %al, %al -; SKX-NEXT: sete %al -; SKX-NEXT: retq %tmp = icmp slt <4 x i32> %arg, zeroinitializer %tmp1 = bitcast <4 x i1> %tmp to i4 %tmp2 = icmp eq i4 %tmp1, 0 @@ -703,14 +632,14 @@ define i1 @allzeros_v4i32_sign(<4 x i32> %arg) { } define i1 @allones_v8i32_sign(<8 x i32> %arg) { -; SSE2-LABEL: allones_v8i32_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpb $-1, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v8i32_sign: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpb $-1, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX-LABEL: allones_v8i32_sign: ; AVX: # %bb.0: @@ -719,22 +648,6 @@ define i1 @allones_v8i32_sign(<8 x i32> %arg) { ; AVX-NEXT: sete %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq -; -; KNL-LABEL: allones_v8i32_sign: -; KNL: # %bb.0: -; KNL-NEXT: vmovmskps %ymm0, %eax -; KNL-NEXT: cmpb $-1, %al -; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: allones_v8i32_sign: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskps %ymm0, %eax -; SKX-NEXT: cmpb $-1, %al -; SKX-NEXT: sete %al -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq %tmp = icmp slt <8 x i32> %arg, zeroinitializer %tmp1 = bitcast <8 x i1> %tmp to i8 %tmp2 = icmp eq i8 %tmp1, -1 @@ -742,13 +655,13 @@ define i1 @allones_v8i32_sign(<8 x i32> %arg) { } define i1 @allzeros_v8i32_sign(<8 x i32> %arg) { -; SSE2-LABEL: allzeros_v8i32_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v8i32_sign: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX-LABEL: allzeros_v8i32_sign: ; AVX: # %bb.0: @@ -757,22 +670,6 @@ define i1 @allzeros_v8i32_sign(<8 x i32> %arg) { ; AVX-NEXT: sete %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq -; -; KNL-LABEL: allzeros_v8i32_sign: -; KNL: # %bb.0: -; KNL-NEXT: vmovmskps %ymm0, %eax -; KNL-NEXT: testb %al, %al -; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: allzeros_v8i32_sign: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskps %ymm0, %eax -; SKX-NEXT: testb %al, %al -; SKX-NEXT: sete %al -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq %tmp = icmp slt <8 x i32> %arg, zeroinitializer %tmp1 = bitcast <8 x i1> %tmp to i8 %tmp2 = icmp eq i8 %tmp1, 0 @@ -780,15 +677,15 @@ define i1 @allzeros_v8i32_sign(<8 x i32> %arg) { } define i1 @allones_v16i32_sign(<16 x i32> %arg) { -; SSE2-LABEL: allones_v16i32_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm2, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpw $-1, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v16i32_sign: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpw $-1, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v16i32_sign: ; AVX1: # %bb.0: @@ -841,15 +738,15 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) { } define i1 @allzeros_v16i32_sign(<16 x i32> %arg) { -; SSE2-LABEL: allzeros_v16i32_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm2, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testw %ax, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v16i32_sign: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v16i32_sign: ; AVX1: # %bb.0: @@ -900,13 +797,13 @@ define i1 @allzeros_v16i32_sign(<16 x i32> %arg) { } define i1 @allones_v4i64_sign(<4 x i64> %arg) { -; SSE2-LABEL: allones_v4i64_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: cmpb $15, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v4i64_sign: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: cmpb $15, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX-LABEL: allones_v4i64_sign: ; AVX: # %bb.0: @@ -915,22 +812,6 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) { ; AVX-NEXT: sete %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq -; -; KNL-LABEL: allones_v4i64_sign: -; KNL: # %bb.0: -; KNL-NEXT: vmovmskpd %ymm0, %eax -; KNL-NEXT: cmpb $15, %al -; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: allones_v4i64_sign: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskpd %ymm0, %eax -; SKX-NEXT: cmpb $15, %al -; SKX-NEXT: sete %al -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq %tmp = icmp slt <4 x i64> %arg, zeroinitializer %tmp1 = bitcast <4 x i1> %tmp to i4 %tmp2 = icmp eq i4 %tmp1, -1 @@ -938,13 +819,13 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) { } define i1 @allzeros_v4i64_sign(<4 x i64> %arg) { -; SSE2-LABEL: allzeros_v4i64_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: testb %al, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v4i64_sign: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: testb %al, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX-LABEL: allzeros_v4i64_sign: ; AVX: # %bb.0: @@ -953,22 +834,6 @@ define i1 @allzeros_v4i64_sign(<4 x i64> %arg) { ; AVX-NEXT: sete %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq -; -; KNL-LABEL: allzeros_v4i64_sign: -; KNL: # %bb.0: -; KNL-NEXT: vmovmskpd %ymm0, %eax -; KNL-NEXT: testb %al, %al -; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: allzeros_v4i64_sign: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskpd %ymm0, %eax -; SKX-NEXT: testb %al, %al -; SKX-NEXT: sete %al -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq %tmp = icmp slt <4 x i64> %arg, zeroinitializer %tmp1 = bitcast <4 x i1> %tmp to i4 %tmp2 = icmp eq i4 %tmp1, 0 @@ -976,16 +841,16 @@ define i1 @allzeros_v4i64_sign(<4 x i64> %arg) { } define i1 @allones_v8i64_sign(<8 x i64> %arg) { -; SSE2-LABEL: allones_v8i64_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packssdw %xmm2, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpb $-1, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v8i64_sign: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packssdw %xmm2, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpb $-1, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v8i64_sign: ; AVX1: # %bb.0: @@ -1036,15 +901,15 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) { } define i1 @allzeros_v8i64_sign(<8 x i64> %arg) { -; SSE2-LABEL: allzeros_v8i64_sign: -; SSE2: # %bb.0: -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packssdw %xmm2, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v8i64_sign: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packssdw %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v8i64_sign: ; AVX1: # %bb.0: @@ -1095,21 +960,21 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) { } define i1 @allones_v16i8_and1(<16 x i8> %arg) { -; SSE2-LABEL: allones_v16i8_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $7, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpw $-1, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allones_v16i8_and1: -; AVX: # %bb.0: -; AVX-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: cmpw $-1, %ax -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allones_v16i8_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $7, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpw $-1, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allones_v16i8_and1: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: cmpw $-1, %ax +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v16i8_and1: ; KNL: # %bb.0: @@ -1133,23 +998,23 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) { } define i1 @allzeros_v16i8_not(<16 x i8> %a0) { -; SSE2-LABEL: allzeros_v16i8_not: -; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpw $-1, %ax -; SSE2-NEXT: setne %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allzeros_v16i8_not: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: cmpw $-1, %ax -; AVX-NEXT: setne %al -; AVX-NEXT: retq +; SSE-LABEL: allzeros_v16i8_not: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %eax +; SSE-NEXT: cmpw $-1, %ax +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v16i8_not: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: cmpw $-1, %ax +; AVX1OR2-NEXT: setne %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v16i8_not: ; KNL: # %bb.0: @@ -1184,14 +1049,23 @@ define i1 @allzeros_v2i64_not(<2 x i64> %a0) { ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; -; AVX-LABEL: allzeros_v2i64_not: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: cmpb $3, %al -; AVX-NEXT: setne %al -; AVX-NEXT: retq +; SSE41-LABEL: allzeros_v2i64_not: +; SSE41: # %bb.0: +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE41-NEXT: movmskpd %xmm1, %eax +; SSE41-NEXT: cmpb $3, %al +; SSE41-NEXT: setne %al +; SSE41-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v2i64_not: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: cmpb $3, %al +; AVX1OR2-NEXT: setne %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v2i64_not: ; KNL: # %bb.0: @@ -1218,17 +1092,17 @@ define i1 @allzeros_v2i64_not(<2 x i64> %a0) { } define i1 @allzeros_v8i32_not(<8 x i32> %a0) { -; SSE2-LABEL: allzeros_v8i32_not: -; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpb $-1, %al -; SSE2-NEXT: setne %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v8i32_not: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpb $-1, %al +; SSE-NEXT: setne %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v8i32_not: ; AVX1: # %bb.0: @@ -1301,6 +1175,22 @@ define i1 @allzeros_v8i64_not(<8 x i64> %a0) { ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; +; SSE41-LABEL: allzeros_v8i64_not: +; SSE41: # %bb.0: +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pcmpeqq %xmm4, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm4, %xmm2 +; SSE41-NEXT: packssdw %xmm3, %xmm2 +; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 +; SSE41-NEXT: packssdw %xmm1, %xmm0 +; SSE41-NEXT: packssdw %xmm2, %xmm0 +; SSE41-NEXT: packsswb %xmm0, %xmm0 +; SSE41-NEXT: pmovmskb %xmm0, %eax +; SSE41-NEXT: cmpb $-1, %al +; SSE41-NEXT: setne %al +; SSE41-NEXT: retq +; ; AVX1-LABEL: allzeros_v8i64_not: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 @@ -1354,21 +1244,21 @@ define i1 @allzeros_v8i64_not(<8 x i64> %a0) { } define i1 @allzeros_v16i8_and1(<16 x i8> %arg) { -; SSE2-LABEL: allzeros_v16i8_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $7, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testw %ax, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allzeros_v16i8_and1: -; AVX: # %bb.0: -; AVX-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: testw %ax, %ax -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allzeros_v16i8_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $7, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v16i8_and1: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: testw %ax, %ax +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v16i8_and1: ; KNL: # %bb.0: @@ -1392,17 +1282,17 @@ define i1 @allzeros_v16i8_and1(<16 x i8> %arg) { } define i1 @allones_v32i8_and1(<32 x i8> %arg) { -; SSE2-LABEL: allones_v32i8_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $7, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $7, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: cmpl $-1, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v32i8_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $7, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $7, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v32i8_and1: ; AVX1: # %bb.0: @@ -1451,16 +1341,16 @@ define i1 @allones_v32i8_and1(<32 x i8> %arg) { } define i1 @allzeros_v32i8_and1(<32 x i8> %arg) { -; SSE2-LABEL: allzeros_v32i8_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $7, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $7, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v32i8_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $7, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $7, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i8_and1: ; AVX1: # %bb.0: @@ -1508,25 +1398,25 @@ define i1 @allzeros_v32i8_and1(<32 x i8> %arg) { } define i1 @allones_v64i8_and1(<64 x i8> %arg) { -; SSE2-LABEL: allones_v64i8_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $7, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $7, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: psllw $7, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %eax -; SSE2-NEXT: psllw $7, %xmm3 -; SSE2-NEXT: pmovmskb %xmm3, %edx -; SSE2-NEXT: shll $16, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: shlq $32, %rdx -; SSE2-NEXT: orq %rcx, %rdx -; SSE2-NEXT: cmpq $-1, %rdx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v64i8_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $7, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $7, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: psllw $7, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: psllw $7, %xmm3 +; SSE-NEXT: pmovmskb %xmm3, %edx +; SSE-NEXT: shll $16, %edx +; SSE-NEXT: orl %eax, %edx +; SSE-NEXT: shlq $32, %rdx +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: cmpq $-1, %rdx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v64i8_and1: ; AVX1: # %bb.0: @@ -1593,24 +1483,24 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) { } define i1 @allzeros_v64i8_and1(<64 x i8> %arg) { -; SSE2-LABEL: allzeros_v64i8_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $7, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $7, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: psllw $7, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %eax -; SSE2-NEXT: psllw $7, %xmm3 -; SSE2-NEXT: pmovmskb %xmm3, %edx -; SSE2-NEXT: shll $16, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: shlq $32, %rdx -; SSE2-NEXT: orq %rcx, %rdx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v64i8_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $7, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $7, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: psllw $7, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: psllw $7, %xmm3 +; SSE-NEXT: pmovmskb %xmm3, %edx +; SSE-NEXT: shll $16, %edx +; SSE-NEXT: orl %eax, %edx +; SSE-NEXT: shlq $32, %rdx +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v64i8_and1: ; AVX1: # %bb.0: @@ -1674,23 +1564,23 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) { } define i1 @allones_v8i16_and1(<8 x i16> %arg) { -; SSE2-LABEL: allones_v8i16_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $15, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpb $-1, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allones_v8i16_and1: -; AVX: # %bb.0: -; AVX-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: cmpb $-1, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allones_v8i16_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $15, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpb $-1, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allones_v8i16_and1: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: cmpb $-1, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v8i16_and1: ; KNL: # %bb.0: @@ -1718,21 +1608,21 @@ define i1 @allones_v8i16_and1(<8 x i16> %arg) { } define i1 @allzeros_v8i16_and1(<8 x i16> %arg) { -; SSE2-LABEL: allzeros_v8i16_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $15, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allzeros_v8i16_and1: -; AVX: # %bb.0: -; AVX-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: testl $43690, %eax # imm = 0xAAAA -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allzeros_v8i16_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $15, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v8i16_and1: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: testl $43690, %eax # imm = 0xAAAA +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v8i16_and1: ; KNL: # %bb.0: @@ -1760,15 +1650,15 @@ define i1 @allzeros_v8i16_and1(<8 x i16> %arg) { } define i1 @allones_v16i16_and1(<16 x i16> %arg) { -; SSE2-LABEL: allones_v16i16_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $15, %xmm1 -; SSE2-NEXT: psllw $15, %xmm0 -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpw $-1, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v16i16_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $15, %xmm1 +; SSE-NEXT: psllw $15, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpw $-1, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v16i16_and1: ; AVX1: # %bb.0: @@ -1819,21 +1709,21 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) { } define i1 @allones_v32i16_and1(<32 x i16> %arg) { -; SSE2-LABEL: allones_v32i16_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $15, %xmm1 -; SSE2-NEXT: psllw $15, %xmm0 -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $15, %xmm3 -; SSE2-NEXT: psllw $15, %xmm2 -; SSE2-NEXT: packsswb %xmm3, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: cmpl $-1, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v32i16_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $15, %xmm1 +; SSE-NEXT: psllw $15, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $15, %xmm3 +; SSE-NEXT: psllw $15, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v32i16_and1: ; AVX1: # %bb.0: @@ -1900,20 +1790,20 @@ define i1 @allones_v32i16_and1(<32 x i16> %arg) { } define i1 @allzeros_v32i16_and1(<32 x i16> %arg) { -; SSE2-LABEL: allzeros_v32i16_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $15, %xmm1 -; SSE2-NEXT: psllw $15, %xmm0 -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $15, %xmm3 -; SSE2-NEXT: psllw $15, %xmm2 -; SSE2-NEXT: packsswb %xmm3, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v32i16_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $15, %xmm1 +; SSE-NEXT: psllw $15, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $15, %xmm3 +; SSE-NEXT: psllw $15, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i16_and1: ; AVX1: # %bb.0: @@ -1978,15 +1868,15 @@ define i1 @allzeros_v32i16_and1(<32 x i16> %arg) { } define i1 @allzeros_v16i16_and1(<16 x i16> %arg) { -; SSE2-LABEL: allzeros_v16i16_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $15, %xmm1 -; SSE2-NEXT: psllw $15, %xmm0 -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testw %ax, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v16i16_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllw $15, %xmm1 +; SSE-NEXT: psllw $15, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v16i16_and1: ; AVX1: # %bb.0: @@ -2035,21 +1925,21 @@ define i1 @allzeros_v16i16_and1(<16 x i16> %arg) { } define i1 @allones_v4i32_and1(<4 x i32> %arg) { -; SSE2-LABEL: allones_v4i32_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $31, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: cmpb $15, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allones_v4i32_and1: -; AVX: # %bb.0: -; AVX-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: cmpb $15, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allones_v4i32_and1: +; SSE: # %bb.0: +; SSE-NEXT: pslld $31, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: cmpb $15, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allones_v4i32_and1: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskps %xmm0, %eax +; AVX1OR2-NEXT: cmpb $15, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v4i32_and1: ; KNL: # %bb.0: @@ -2077,21 +1967,21 @@ define i1 @allones_v4i32_and1(<4 x i32> %arg) { } define i1 @allzeros_v4i32_and1(<4 x i32> %arg) { -; SSE2-LABEL: allzeros_v4i32_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $31, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: testb %al, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allzeros_v4i32_and1: -; AVX: # %bb.0: -; AVX-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: testb %al, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allzeros_v4i32_and1: +; SSE: # %bb.0: +; SSE-NEXT: pslld $31, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: testb %al, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v4i32_and1: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskps %xmm0, %eax +; AVX1OR2-NEXT: testb %al, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v4i32_and1: ; KNL: # %bb.0: @@ -2117,16 +2007,16 @@ define i1 @allzeros_v4i32_and1(<4 x i32> %arg) { } define i1 @allones_v8i32_and1(<8 x i32> %arg) { -; SSE2-LABEL: allones_v8i32_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $31, %xmm1 -; SSE2-NEXT: pslld $31, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpb $-1, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v8i32_and1: +; SSE: # %bb.0: +; SSE-NEXT: pslld $31, %xmm1 +; SSE-NEXT: pslld $31, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpb $-1, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v8i32_and1: ; AVX1: # %bb.0: @@ -2174,15 +2064,15 @@ define i1 @allones_v8i32_and1(<8 x i32> %arg) { } define i1 @allzeros_v8i32_and1(<8 x i32> %arg) { -; SSE2-LABEL: allzeros_v8i32_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $31, %xmm1 -; SSE2-NEXT: pslld $31, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v8i32_and1: +; SSE: # %bb.0: +; SSE-NEXT: pslld $31, %xmm1 +; SSE-NEXT: pslld $31, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v8i32_and1: ; AVX1: # %bb.0: @@ -2230,19 +2120,19 @@ define i1 @allzeros_v8i32_and1(<8 x i32> %arg) { } define i1 @allones_v16i32_and1(<16 x i32> %arg) { -; SSE2-LABEL: allones_v16i32_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $31, %xmm3 -; SSE2-NEXT: pslld $31, %xmm2 -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: pslld $31, %xmm1 -; SSE2-NEXT: pslld $31, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm2, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpw $-1, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v16i32_and1: +; SSE: # %bb.0: +; SSE-NEXT: pslld $31, %xmm3 +; SSE-NEXT: pslld $31, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: pslld $31, %xmm1 +; SSE-NEXT: pslld $31, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpw $-1, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v16i32_and1: ; AVX1: # %bb.0: @@ -2277,21 +2167,13 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; KNL-LABEL: allones_v16i32_and1: -; KNL: # %bb.0: -; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 -; KNL-NEXT: setb %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: allones_v16i32_and1: -; SKX: # %bb.0: -; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 -; SKX-NEXT: kortestw %k0, %k0 -; SKX-NEXT: setb %al -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq +; AVX512-LABEL: allones_v16i32_and1: +; AVX512: # %bb.0: +; AVX512-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: setb %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %tmp = and <16 x i32> %arg, %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer %tmp2 = bitcast <16 x i1> %tmp1 to i16 @@ -2300,19 +2182,19 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) { } define i1 @allzeros_v16i32_and1(<16 x i32> %arg) { -; SSE2-LABEL: allzeros_v16i32_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $31, %xmm3 -; SSE2-NEXT: pslld $31, %xmm2 -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: pslld $31, %xmm1 -; SSE2-NEXT: pslld $31, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm2, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testw %ax, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v16i32_and1: +; SSE: # %bb.0: +; SSE-NEXT: pslld $31, %xmm3 +; SSE-NEXT: pslld $31, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: pslld $31, %xmm1 +; SSE-NEXT: pslld $31, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v16i32_and1: ; AVX1: # %bb.0: @@ -2345,21 +2227,13 @@ define i1 @allzeros_v16i32_and1(<16 x i32> %arg) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; KNL-LABEL: allzeros_v16i32_and1: -; KNL: # %bb.0: -; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 -; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: allzeros_v16i32_and1: -; SKX: # %bb.0: -; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 -; SKX-NEXT: kortestw %k0, %k0 -; SKX-NEXT: sete %al -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq +; AVX512-LABEL: allzeros_v16i32_and1: +; AVX512: # %bb.0: +; AVX512-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: sete %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %tmp = and <16 x i32> %arg, %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer %tmp2 = bitcast <16 x i1> %tmp1 to i16 @@ -2368,21 +2242,21 @@ define i1 @allzeros_v16i32_and1(<16 x i32> %arg) { } define i1 @allones_v2i64_and1(<2 x i64> %arg) { -; SSE2-LABEL: allones_v2i64_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $63, %xmm0 -; SSE2-NEXT: movmskpd %xmm0, %eax -; SSE2-NEXT: cmpb $3, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allones_v2i64_and1: -; AVX: # %bb.0: -; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: cmpb $3, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allones_v2i64_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllq $63, %xmm0 +; SSE-NEXT: movmskpd %xmm0, %eax +; SSE-NEXT: cmpb $3, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allones_v2i64_and1: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: cmpb $3, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v2i64_and1: ; KNL: # %bb.0: @@ -2411,21 +2285,21 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) { } define i1 @allzeros_v2i64_and1(<2 x i64> %arg) { -; SSE2-LABEL: allzeros_v2i64_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $63, %xmm0 -; SSE2-NEXT: movmskpd %xmm0, %eax -; SSE2-NEXT: testb %al, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allzeros_v2i64_and1: -; AVX: # %bb.0: -; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: testb %al, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allzeros_v2i64_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllq $63, %xmm0 +; SSE-NEXT: movmskpd %xmm0, %eax +; SSE-NEXT: testb %al, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v2i64_and1: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: testb %al, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v2i64_and1: ; KNL: # %bb.0: @@ -2452,15 +2326,15 @@ define i1 @allzeros_v2i64_and1(<2 x i64> %arg) { } define i1 @allones_v4i64_and1(<4 x i64> %arg) { -; SSE2-LABEL: allones_v4i64_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $63, %xmm1 -; SSE2-NEXT: psllq $63, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: cmpb $15, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v4i64_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllq $63, %xmm1 +; SSE-NEXT: psllq $63, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: cmpb $15, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v4i64_and1: ; AVX1: # %bb.0: @@ -2510,15 +2384,15 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) { } define i1 @allzeros_v4i64_and1(<4 x i64> %arg) { -; SSE2-LABEL: allzeros_v4i64_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $63, %xmm1 -; SSE2-NEXT: psllq $63, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: testb %al, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v4i64_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllq $63, %xmm1 +; SSE-NEXT: psllq $63, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: testb %al, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v4i64_and1: ; AVX1: # %bb.0: @@ -2566,20 +2440,20 @@ define i1 @allzeros_v4i64_and1(<4 x i64> %arg) { } define i1 @allones_v8i64_and1(<8 x i64> %arg) { -; SSE2-LABEL: allones_v8i64_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $63, %xmm3 -; SSE2-NEXT: psllq $63, %xmm2 -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: psllq $63, %xmm1 -; SSE2-NEXT: psllq $63, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packssdw %xmm2, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpb $-1, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v8i64_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllq $63, %xmm3 +; SSE-NEXT: psllq $63, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: psllq $63, %xmm1 +; SSE-NEXT: psllq $63, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packssdw %xmm2, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpb $-1, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v8i64_and1: ; AVX1: # %bb.0: @@ -2636,19 +2510,19 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) { } define i1 @allzeros_v8i64_and1(<8 x i64> %arg) { -; SSE2-LABEL: allzeros_v8i64_and1: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $63, %xmm3 -; SSE2-NEXT: psllq $63, %xmm2 -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: psllq $63, %xmm1 -; SSE2-NEXT: psllq $63, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packssdw %xmm2, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v8i64_and1: +; SSE: # %bb.0: +; SSE-NEXT: psllq $63, %xmm3 +; SSE-NEXT: psllq $63, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: psllq $63, %xmm1 +; SSE-NEXT: psllq $63, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packssdw %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v8i64_and1: ; AVX1: # %bb.0: @@ -2705,21 +2579,21 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) { } define i1 @allones_v16i8_and4(<16 x i8> %arg) { -; SSE2-LABEL: allones_v16i8_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $5, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpw $-1, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allones_v16i8_and4: -; AVX: # %bb.0: -; AVX-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: cmpw $-1, %ax -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allones_v16i8_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $5, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpw $-1, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allones_v16i8_and4: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllw $5, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: cmpw $-1, %ax +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v16i8_and4: ; KNL: # %bb.0: @@ -2735,29 +2609,29 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) { ; SKX-NEXT: kortestw %k0, %k0 ; SKX-NEXT: setb %al ; SKX-NEXT: retq - %tmp = and <16 x i8> %arg, - %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer - %tmp2 = bitcast <16 x i1> %tmp1 to i16 - %tmp3 = icmp eq i16 %tmp2, -1 - ret i1 %tmp3 -} - -define i1 @allzeros_v16i8_and4(<16 x i8> %arg) { -; SSE2-LABEL: allzeros_v16i8_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $5, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testw %ax, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allzeros_v16i8_and4: -; AVX: # %bb.0: -; AVX-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: testw %ax, %ax -; AVX-NEXT: sete %al -; AVX-NEXT: retq + %tmp = and <16 x i8> %arg, + %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer + %tmp2 = bitcast <16 x i1> %tmp1 to i16 + %tmp3 = icmp eq i16 %tmp2, -1 + ret i1 %tmp3 +} + +define i1 @allzeros_v16i8_and4(<16 x i8> %arg) { +; SSE-LABEL: allzeros_v16i8_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $5, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v16i8_and4: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllw $5, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: testw %ax, %ax +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v16i8_and4: ; KNL: # %bb.0: @@ -2781,17 +2655,17 @@ define i1 @allzeros_v16i8_and4(<16 x i8> %arg) { } define i1 @allones_v32i8_and4(<32 x i8> %arg) { -; SSE2-LABEL: allones_v32i8_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $5, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $5, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: cmpl $-1, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v32i8_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $5, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $5, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v32i8_and4: ; AVX1: # %bb.0: @@ -2840,16 +2714,16 @@ define i1 @allones_v32i8_and4(<32 x i8> %arg) { } define i1 @allzeros_v32i8_and4(<32 x i8> %arg) { -; SSE2-LABEL: allzeros_v32i8_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $5, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $5, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v32i8_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $5, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $5, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i8_and4: ; AVX1: # %bb.0: @@ -2897,25 +2771,25 @@ define i1 @allzeros_v32i8_and4(<32 x i8> %arg) { } define i1 @allones_v64i8_and4(<64 x i8> %arg) { -; SSE2-LABEL: allones_v64i8_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $5, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $5, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: psllw $5, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %eax -; SSE2-NEXT: psllw $5, %xmm3 -; SSE2-NEXT: pmovmskb %xmm3, %edx -; SSE2-NEXT: shll $16, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: shlq $32, %rdx -; SSE2-NEXT: orq %rcx, %rdx -; SSE2-NEXT: cmpq $-1, %rdx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v64i8_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $5, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $5, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: psllw $5, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: psllw $5, %xmm3 +; SSE-NEXT: pmovmskb %xmm3, %edx +; SSE-NEXT: shll $16, %edx +; SSE-NEXT: orl %eax, %edx +; SSE-NEXT: shlq $32, %rdx +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: cmpq $-1, %rdx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v64i8_and4: ; AVX1: # %bb.0: @@ -2982,24 +2856,24 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) { } define i1 @allzeros_v64i8_and4(<64 x i8> %arg) { -; SSE2-LABEL: allzeros_v64i8_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $5, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $5, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: psllw $5, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %eax -; SSE2-NEXT: psllw $5, %xmm3 -; SSE2-NEXT: pmovmskb %xmm3, %edx -; SSE2-NEXT: shll $16, %edx -; SSE2-NEXT: orl %eax, %edx -; SSE2-NEXT: shlq $32, %rdx -; SSE2-NEXT: orq %rcx, %rdx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v64i8_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $5, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $5, %xmm1 +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: psllw $5, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: psllw $5, %xmm3 +; SSE-NEXT: pmovmskb %xmm3, %edx +; SSE-NEXT: shll $16, %edx +; SSE-NEXT: orl %eax, %edx +; SSE-NEXT: shlq $32, %rdx +; SSE-NEXT: orq %rcx, %rdx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v64i8_and4: ; AVX1: # %bb.0: @@ -3063,23 +2937,23 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) { } define i1 @allones_v8i16_and4(<8 x i16> %arg) { -; SSE2-LABEL: allones_v8i16_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $13, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpb $-1, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allones_v8i16_and4: -; AVX: # %bb.0: -; AVX-NEXT: vpsllw $13, %xmm0, %xmm0 -; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: cmpb $-1, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allones_v8i16_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $13, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpb $-1, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allones_v8i16_and4: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllw $13, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: cmpb $-1, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v8i16_and4: ; KNL: # %bb.0: @@ -3107,21 +2981,21 @@ define i1 @allones_v8i16_and4(<8 x i16> %arg) { } define i1 @allzeros_v8i16_and4(<8 x i16> %arg) { -; SSE2-LABEL: allzeros_v8i16_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $13, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allzeros_v8i16_and4: -; AVX: # %bb.0: -; AVX-NEXT: vpsllw $13, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: testl $43690, %eax # imm = 0xAAAA -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allzeros_v8i16_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $13, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v8i16_and4: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllw $13, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: testl $43690, %eax # imm = 0xAAAA +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v8i16_and4: ; KNL: # %bb.0: @@ -3149,15 +3023,15 @@ define i1 @allzeros_v8i16_and4(<8 x i16> %arg) { } define i1 @allones_v16i16_and4(<16 x i16> %arg) { -; SSE2-LABEL: allones_v16i16_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $13, %xmm1 -; SSE2-NEXT: psllw $13, %xmm0 -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpw $-1, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v16i16_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $13, %xmm1 +; SSE-NEXT: psllw $13, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpw $-1, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v16i16_and4: ; AVX1: # %bb.0: @@ -3208,21 +3082,21 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) { } define i1 @allones_v32i16_and4(<32 x i16> %arg) { -; SSE2-LABEL: allones_v32i16_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $13, %xmm1 -; SSE2-NEXT: psllw $13, %xmm0 -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $13, %xmm3 -; SSE2-NEXT: psllw $13, %xmm2 -; SSE2-NEXT: packsswb %xmm3, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: cmpl $-1, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v32i16_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $13, %xmm1 +; SSE-NEXT: psllw $13, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $13, %xmm3 +; SSE-NEXT: psllw $13, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: cmpl $-1, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v32i16_and4: ; AVX1: # %bb.0: @@ -3289,20 +3163,20 @@ define i1 @allones_v32i16_and4(<32 x i16> %arg) { } define i1 @allzeros_v32i16_and4(<32 x i16> %arg) { -; SSE2-LABEL: allzeros_v32i16_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $13, %xmm1 -; SSE2-NEXT: psllw $13, %xmm0 -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: psllw $13, %xmm3 -; SSE2-NEXT: psllw $13, %xmm2 -; SSE2-NEXT: packsswb %xmm3, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: orl %eax, %ecx -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v32i16_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $13, %xmm1 +; SSE-NEXT: psllw $13, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: psllw $13, %xmm3 +; SSE-NEXT: psllw $13, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: orl %eax, %ecx +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v32i16_and4: ; AVX1: # %bb.0: @@ -3367,15 +3241,15 @@ define i1 @allzeros_v32i16_and4(<32 x i16> %arg) { } define i1 @allzeros_v16i16_and4(<16 x i16> %arg) { -; SSE2-LABEL: allzeros_v16i16_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllw $13, %xmm1 -; SSE2-NEXT: psllw $13, %xmm0 -; SSE2-NEXT: packsswb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testw %ax, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v16i16_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllw $13, %xmm1 +; SSE-NEXT: psllw $13, %xmm0 +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v16i16_and4: ; AVX1: # %bb.0: @@ -3424,21 +3298,21 @@ define i1 @allzeros_v16i16_and4(<16 x i16> %arg) { } define i1 @allones_v4i32_and4(<4 x i32> %arg) { -; SSE2-LABEL: allones_v4i32_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $29, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: cmpb $15, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allones_v4i32_and4: -; AVX: # %bb.0: -; AVX-NEXT: vpslld $29, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: cmpb $15, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allones_v4i32_and4: +; SSE: # %bb.0: +; SSE-NEXT: pslld $29, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: cmpb $15, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allones_v4i32_and4: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpslld $29, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskps %xmm0, %eax +; AVX1OR2-NEXT: cmpb $15, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v4i32_and4: ; KNL: # %bb.0: @@ -3466,21 +3340,21 @@ define i1 @allones_v4i32_and4(<4 x i32> %arg) { } define i1 @allzeros_v4i32_and4(<4 x i32> %arg) { -; SSE2-LABEL: allzeros_v4i32_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $29, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: testb %al, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allzeros_v4i32_and4: -; AVX: # %bb.0: -; AVX-NEXT: vpslld $29, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: testb %al, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allzeros_v4i32_and4: +; SSE: # %bb.0: +; SSE-NEXT: pslld $29, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: testb %al, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v4i32_and4: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpslld $29, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskps %xmm0, %eax +; AVX1OR2-NEXT: testb %al, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v4i32_and4: ; KNL: # %bb.0: @@ -3506,16 +3380,16 @@ define i1 @allzeros_v4i32_and4(<4 x i32> %arg) { } define i1 @allones_v8i32_and4(<8 x i32> %arg) { -; SSE2-LABEL: allones_v8i32_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $29, %xmm1 -; SSE2-NEXT: pslld $29, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpb $-1, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v8i32_and4: +; SSE: # %bb.0: +; SSE-NEXT: pslld $29, %xmm1 +; SSE-NEXT: pslld $29, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpb $-1, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v8i32_and4: ; AVX1: # %bb.0: @@ -3563,15 +3437,15 @@ define i1 @allones_v8i32_and4(<8 x i32> %arg) { } define i1 @allzeros_v8i32_and4(<8 x i32> %arg) { -; SSE2-LABEL: allzeros_v8i32_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $29, %xmm1 -; SSE2-NEXT: pslld $29, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v8i32_and4: +; SSE: # %bb.0: +; SSE-NEXT: pslld $29, %xmm1 +; SSE-NEXT: pslld $29, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v8i32_and4: ; AVX1: # %bb.0: @@ -3619,19 +3493,19 @@ define i1 @allzeros_v8i32_and4(<8 x i32> %arg) { } define i1 @allones_v16i32_and4(<16 x i32> %arg) { -; SSE2-LABEL: allones_v16i32_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $29, %xmm3 -; SSE2-NEXT: pslld $29, %xmm2 -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: pslld $29, %xmm1 -; SSE2-NEXT: pslld $29, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm2, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpw $-1, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v16i32_and4: +; SSE: # %bb.0: +; SSE-NEXT: pslld $29, %xmm3 +; SSE-NEXT: pslld $29, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: pslld $29, %xmm1 +; SSE-NEXT: pslld $29, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpw $-1, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v16i32_and4: ; AVX1: # %bb.0: @@ -3666,21 +3540,13 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; KNL-LABEL: allones_v16i32_and4: -; KNL: # %bb.0: -; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 -; KNL-NEXT: setb %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: allones_v16i32_and4: -; SKX: # %bb.0: -; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 -; SKX-NEXT: kortestw %k0, %k0 -; SKX-NEXT: setb %al -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq +; AVX512-LABEL: allones_v16i32_and4: +; AVX512: # %bb.0: +; AVX512-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: setb %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %tmp = and <16 x i32> %arg, %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer %tmp2 = bitcast <16 x i1> %tmp1 to i16 @@ -3689,19 +3555,19 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) { } define i1 @allzeros_v16i32_and4(<16 x i32> %arg) { -; SSE2-LABEL: allzeros_v16i32_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $29, %xmm3 -; SSE2-NEXT: pslld $29, %xmm2 -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: pslld $29, %xmm1 -; SSE2-NEXT: pslld $29, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm2, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testw %ax, %ax -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v16i32_and4: +; SSE: # %bb.0: +; SSE-NEXT: pslld $29, %xmm3 +; SSE-NEXT: pslld $29, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: pslld $29, %xmm1 +; SSE-NEXT: pslld $29, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v16i32_and4: ; AVX1: # %bb.0: @@ -3734,21 +3600,13 @@ define i1 @allzeros_v16i32_and4(<16 x i32> %arg) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; KNL-LABEL: allzeros_v16i32_and4: -; KNL: # %bb.0: -; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 -; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: allzeros_v16i32_and4: -; SKX: # %bb.0: -; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 -; SKX-NEXT: kortestw %k0, %k0 -; SKX-NEXT: sete %al -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq +; AVX512-LABEL: allzeros_v16i32_and4: +; AVX512: # %bb.0: +; AVX512-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: sete %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %tmp = and <16 x i32> %arg, %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer %tmp2 = bitcast <16 x i1> %tmp1 to i16 @@ -3757,21 +3615,21 @@ define i1 @allzeros_v16i32_and4(<16 x i32> %arg) { } define i1 @allones_v2i64_and4(<2 x i64> %arg) { -; SSE2-LABEL: allones_v2i64_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $61, %xmm0 -; SSE2-NEXT: movmskpd %xmm0, %eax -; SSE2-NEXT: cmpb $3, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allones_v2i64_and4: -; AVX: # %bb.0: -; AVX-NEXT: vpsllq $61, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: cmpb $3, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allones_v2i64_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllq $61, %xmm0 +; SSE-NEXT: movmskpd %xmm0, %eax +; SSE-NEXT: cmpb $3, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allones_v2i64_and4: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllq $61, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: cmpb $3, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v2i64_and4: ; KNL: # %bb.0: @@ -3800,21 +3658,21 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) { } define i1 @allzeros_v2i64_and4(<2 x i64> %arg) { -; SSE2-LABEL: allzeros_v2i64_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $61, %xmm0 -; SSE2-NEXT: movmskpd %xmm0, %eax -; SSE2-NEXT: testb %al, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: allzeros_v2i64_and4: -; AVX: # %bb.0: -; AVX-NEXT: vpsllq $61, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: testb %al, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: allzeros_v2i64_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllq $61, %xmm0 +; SSE-NEXT: movmskpd %xmm0, %eax +; SSE-NEXT: testb %al, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: allzeros_v2i64_and4: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpsllq $61, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: testb %al, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allzeros_v2i64_and4: ; KNL: # %bb.0: @@ -3841,15 +3699,15 @@ define i1 @allzeros_v2i64_and4(<2 x i64> %arg) { } define i1 @allones_v4i64_and4(<4 x i64> %arg) { -; SSE2-LABEL: allones_v4i64_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $61, %xmm1 -; SSE2-NEXT: psllq $61, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: cmpb $15, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v4i64_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllq $61, %xmm1 +; SSE-NEXT: psllq $61, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: cmpb $15, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v4i64_and4: ; AVX1: # %bb.0: @@ -3899,15 +3757,15 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) { } define i1 @allzeros_v4i64_and4(<4 x i64> %arg) { -; SSE2-LABEL: allzeros_v4i64_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $61, %xmm1 -; SSE2-NEXT: psllq $61, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: testb %al, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v4i64_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllq $61, %xmm1 +; SSE-NEXT: psllq $61, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: testb %al, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v4i64_and4: ; AVX1: # %bb.0: @@ -3955,20 +3813,20 @@ define i1 @allzeros_v4i64_and4(<4 x i64> %arg) { } define i1 @allones_v8i64_and4(<8 x i64> %arg) { -; SSE2-LABEL: allones_v8i64_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $61, %xmm3 -; SSE2-NEXT: psllq $61, %xmm2 -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: psllq $61, %xmm1 -; SSE2-NEXT: psllq $61, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packssdw %xmm2, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpb $-1, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allones_v8i64_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllq $61, %xmm3 +; SSE-NEXT: psllq $61, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: psllq $61, %xmm1 +; SSE-NEXT: psllq $61, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packssdw %xmm2, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: cmpb $-1, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allones_v8i64_and4: ; AVX1: # %bb.0: @@ -4025,19 +3883,19 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) { } define i1 @allzeros_v8i64_and4(<8 x i64> %arg) { -; SSE2-LABEL: allzeros_v8i64_and4: -; SSE2: # %bb.0: -; SSE2-NEXT: psllq $61, %xmm3 -; SSE2-NEXT: psllq $61, %xmm2 -; SSE2-NEXT: packssdw %xmm3, %xmm2 -; SSE2-NEXT: psllq $61, %xmm1 -; SSE2-NEXT: psllq $61, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packssdw %xmm2, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq +; SSE-LABEL: allzeros_v8i64_and4: +; SSE: # %bb.0: +; SSE-NEXT: psllq $61, %xmm3 +; SSE-NEXT: psllq $61, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: psllq $61, %xmm1 +; SSE-NEXT: psllq $61, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packssdw %xmm2, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: testl $43690, %eax # imm = 0xAAAA +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: allzeros_v8i64_and4: ; AVX1: # %bb.0: @@ -4097,25 +3955,15 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) { ; MOVMSK instruction. define i32 @movmskpd(<2 x double> %x) { -; SSE2-LABEL: movmskpd: -; SSE2: # %bb.0: -; SSE2-NEXT: movmskpd %xmm0, %eax -; SSE2-NEXT: retq +; SSE-LABEL: movmskpd: +; SSE: # %bb.0: +; SSE-NEXT: movmskpd %xmm0, %eax +; SSE-NEXT: retq ; ; AVX-LABEL: movmskpd: ; AVX: # %bb.0: ; AVX-NEXT: vmovmskpd %xmm0, %eax ; AVX-NEXT: retq -; -; KNL-LABEL: movmskpd: -; KNL: # %bb.0: -; KNL-NEXT: vmovmskpd %xmm0, %eax -; KNL-NEXT: retq -; -; SKX-LABEL: movmskpd: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskpd %xmm0, %eax -; SKX-NEXT: retq %a = bitcast <2 x double> %x to <2 x i64> %b = icmp slt <2 x i64> %a, zeroinitializer %c = bitcast <2 x i1> %b to i2 @@ -4124,25 +3972,15 @@ define i32 @movmskpd(<2 x double> %x) { } define i32 @movmskps(<4 x float> %x) { -; SSE2-LABEL: movmskps: -; SSE2: # %bb.0: -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: retq +; SSE-LABEL: movmskps: +; SSE: # %bb.0: +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: retq ; ; AVX-LABEL: movmskps: ; AVX: # %bb.0: ; AVX-NEXT: vmovmskps %xmm0, %eax ; AVX-NEXT: retq -; -; KNL-LABEL: movmskps: -; KNL: # %bb.0: -; KNL-NEXT: vmovmskps %xmm0, %eax -; KNL-NEXT: retq -; -; SKX-LABEL: movmskps: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskps %xmm0, %eax -; SKX-NEXT: retq %a = bitcast <4 x float> %x to <4 x i32> %b = icmp slt <4 x i32> %a, zeroinitializer %c = bitcast <4 x i1> %b to i4 @@ -4151,29 +3989,17 @@ define i32 @movmskps(<4 x float> %x) { } define i32 @movmskpd256(<4 x double> %x) { -; SSE2-LABEL: movmskpd256: -; SSE2: # %bb.0: -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: retq +; SSE-LABEL: movmskpd256: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: retq ; ; AVX-LABEL: movmskpd256: ; AVX: # %bb.0: ; AVX-NEXT: vmovmskpd %ymm0, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq -; -; KNL-LABEL: movmskpd256: -; KNL: # %bb.0: -; KNL-NEXT: vmovmskpd %ymm0, %eax -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: movmskpd256: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskpd %ymm0, %eax -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq %a = bitcast <4 x double> %x to <4 x i64> %b = icmp slt <4 x i64> %a, zeroinitializer %c = bitcast <4 x i1> %b to i4 @@ -4182,31 +4008,19 @@ define i32 @movmskpd256(<4 x double> %x) { } define i32 @movmskps256(<8 x float> %x) { -; SSE2-LABEL: movmskps256: -; SSE2: # %bb.0: -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: movzbl %al, %eax -; SSE2-NEXT: retq +; SSE-LABEL: movmskps256: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retq ; ; AVX-LABEL: movmskps256: ; AVX: # %bb.0: ; AVX-NEXT: vmovmskps %ymm0, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq -; -; KNL-LABEL: movmskps256: -; KNL: # %bb.0: -; KNL-NEXT: vmovmskps %ymm0, %eax -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: movmskps256: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskps %ymm0, %eax -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq %a = bitcast <8 x float> %x to <8 x i32> %b = icmp slt <8 x i32> %a, zeroinitializer %c = bitcast <8 x i1> %b to i8 @@ -4215,25 +4029,15 @@ define i32 @movmskps256(<8 x float> %x) { } define i32 @movmskb(<16 x i8> %x) { -; SSE2-LABEL: movmskb: -; SSE2: # %bb.0: -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: retq +; SSE-LABEL: movmskb: +; SSE: # %bb.0: +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: retq ; ; AVX-LABEL: movmskb: ; AVX: # %bb.0: ; AVX-NEXT: vpmovmskb %xmm0, %eax ; AVX-NEXT: retq -; -; KNL-LABEL: movmskb: -; KNL: # %bb.0: -; KNL-NEXT: vpmovmskb %xmm0, %eax -; KNL-NEXT: retq -; -; SKX-LABEL: movmskb: -; SKX: # %bb.0: -; SKX-NEXT: vpmovmskb %xmm0, %eax -; SKX-NEXT: retq %a = icmp slt <16 x i8> %x, zeroinitializer %b = bitcast <16 x i1> %a to i16 %c = zext i16 %b to i32 @@ -4241,13 +4045,13 @@ define i32 @movmskb(<16 x i8> %x) { } define i32 @movmskb256(<32 x i8> %x) { -; SSE2-LABEL: movmskb256: -; SSE2: # %bb.0: -; SSE2-NEXT: pmovmskb %xmm0, %ecx -; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: shll $16, %eax -; SSE2-NEXT: orl %ecx, %eax -; SSE2-NEXT: retq +; SSE-LABEL: movmskb256: +; SSE: # %bb.0: +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: pmovmskb %xmm1, %eax +; SSE-NEXT: shll $16, %eax +; SSE-NEXT: orl %ecx, %eax +; SSE-NEXT: retq ; ; AVX1-LABEL: movmskb256: ; AVX1: # %bb.0: @@ -4265,17 +4069,11 @@ define i32 @movmskb256(<32 x i8> %x) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; KNL-LABEL: movmskb256: -; KNL: # %bb.0: -; KNL-NEXT: vpmovmskb %ymm0, %eax -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: movmskb256: -; SKX: # %bb.0: -; SKX-NEXT: vpmovmskb %ymm0, %eax -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq +; AVX512-LABEL: movmskb256: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovmskb %ymm0, %eax +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %a = icmp slt <32 x i8> %x, zeroinitializer %b = bitcast <32 x i1> %a to i32 ret i32 %b @@ -4284,37 +4082,37 @@ define i32 @movmskb256(<32 x i8> %x) { ; Multiple extract elements from a vector compare. define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) { -; SSE2-LABEL: movmsk_v16i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $15, %ecx -; SSE2-NEXT: movl %eax, %edx -; SSE2-NEXT: shrl $8, %edx -; SSE2-NEXT: andl $1, %edx -; SSE2-NEXT: andl $8, %eax -; SSE2-NEXT: shrl $3, %eax -; SSE2-NEXT: xorl %edx, %eax -; SSE2-NEXT: andl %ecx, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; AVX-LABEL: movmsk_v16i8: -; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: shrl $15, %ecx -; AVX-NEXT: movl %eax, %edx -; AVX-NEXT: shrl $8, %edx -; AVX-NEXT: andl $1, %edx -; AVX-NEXT: andl $8, %eax -; AVX-NEXT: shrl $3, %eax -; AVX-NEXT: xorl %edx, %eax -; AVX-NEXT: andl %ecx, %eax -; AVX-NEXT: # kill: def $al killed $al killed $eax -; AVX-NEXT: retq +; SSE-LABEL: movmsk_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: shrl $15, %ecx +; SSE-NEXT: movl %eax, %edx +; SSE-NEXT: shrl $8, %edx +; SSE-NEXT: andl $1, %edx +; SSE-NEXT: andl $8, %eax +; SSE-NEXT: shrl $3, %eax +; SSE-NEXT: xorl %edx, %eax +; SSE-NEXT: andl %ecx, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: movmsk_v16i8: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: movl %eax, %ecx +; AVX1OR2-NEXT: shrl $15, %ecx +; AVX1OR2-NEXT: movl %eax, %edx +; AVX1OR2-NEXT: shrl $8, %edx +; AVX1OR2-NEXT: andl $1, %edx +; AVX1OR2-NEXT: andl $8, %eax +; AVX1OR2-NEXT: shrl $3, %eax +; AVX1OR2-NEXT: xorl %edx, %eax +; AVX1OR2-NEXT: andl %ecx, %eax +; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: movmsk_v16i8: ; KNL: # %bb.0: @@ -4356,25 +4154,25 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) { } define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) { -; SSE2-LABEL: movmsk_v8i16: -; SSE2: # %bb.0: -; SSE2-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE2-NEXT: packsswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: andb $-109, %al -; SSE2-NEXT: cmpb $-109, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: movmsk_v8i16: -; AVX: # %bb.0: -; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: andb $-109, %al -; AVX-NEXT: cmpb $-109, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: movmsk_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: andb $-109, %al +; SSE-NEXT: cmpb $-109, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: movmsk_v8i16: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax +; AVX1OR2-NEXT: andb $-109, %al +; AVX1OR2-NEXT: cmpb $-109, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: movmsk_v8i16: ; KNL: # %bb.0: @@ -4409,29 +4207,29 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) { ; TODO: Replace shift+mask chain with AND+CMP. define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) { -; SSE2-LABEL: movmsk_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE2-NEXT: movmskps %xmm1, %eax -; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrb $3, %cl -; SSE2-NEXT: andb $4, %al -; SSE2-NEXT: shrb $2, %al -; SSE2-NEXT: xorb %cl, %al -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; AVX-LABEL: movmsk_v4i32: -; AVX: # %bb.0: -; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: shrb $3, %cl -; AVX-NEXT: andb $4, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: xorb %cl, %al -; AVX-NEXT: # kill: def $al killed $al killed $eax -; AVX-NEXT: retq +; SSE-LABEL: movmsk_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE-NEXT: movmskps %xmm1, %eax +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: shrb $3, %cl +; SSE-NEXT: andb $4, %al +; SSE-NEXT: shrb $2, %al +; SSE-NEXT: xorb %cl, %al +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: movmsk_v4i32: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; AVX1OR2-NEXT: vmovmskps %xmm0, %eax +; AVX1OR2-NEXT: movl %eax, %ecx +; AVX1OR2-NEXT: shrb $3, %cl +; AVX1OR2-NEXT: andb $4, %al +; AVX1OR2-NEXT: shrb $2, %al +; AVX1OR2-NEXT: xorb %cl, %al +; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: movmsk_v4i32: ; KNL: # %bb.0: @@ -4476,14 +4274,23 @@ define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; -; AVX-LABEL: movmsk_and_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: xorl $3, %eax -; AVX-NEXT: cmpb $3, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE41-LABEL: movmsk_and_v2i64: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 +; SSE41-NEXT: movmskpd %xmm0, %eax +; SSE41-NEXT: xorl $3, %eax +; SSE41-NEXT: cmpb $3, %al +; SSE41-NEXT: sete %al +; SSE41-NEXT: retq +; +; AVX1OR2-LABEL: movmsk_and_v2i64: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: xorl $3, %eax +; AVX1OR2-NEXT: cmpb $3, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: movmsk_and_v2i64: ; KNL: # %bb.0: @@ -4522,13 +4329,21 @@ define i1 @movmsk_or_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; -; AVX-LABEL: movmsk_or_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: xorb $3, %al -; AVX-NEXT: setne %al -; AVX-NEXT: retq +; SSE41-LABEL: movmsk_or_v2i64: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 +; SSE41-NEXT: movmskpd %xmm0, %eax +; SSE41-NEXT: xorb $3, %al +; SSE41-NEXT: setne %al +; SSE41-NEXT: retq +; +; AVX1OR2-LABEL: movmsk_or_v2i64: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: xorb $3, %al +; AVX1OR2-NEXT: setne %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: movmsk_or_v2i64: ; KNL: # %bb.0: @@ -4555,24 +4370,24 @@ define i1 @movmsk_or_v2i64(<2 x i64> %x, <2 x i64> %y) { } define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) { -; SSE2-LABEL: movmsk_v4f32: -; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: cmpeqps %xmm1, %xmm2 -; SSE2-NEXT: cmpunordps %xmm1, %xmm0 -; SSE2-NEXT: orps %xmm2, %xmm0 -; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: testb $14, %al -; SSE2-NEXT: setne %al -; SSE2-NEXT: retq -; -; AVX-LABEL: movmsk_v4f32: -; AVX: # %bb.0: -; AVX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: testb $14, %al -; AVX-NEXT: setne %al -; AVX-NEXT: retq +; SSE-LABEL: movmsk_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm0, %xmm2 +; SSE-NEXT: cmpeqps %xmm1, %xmm2 +; SSE-NEXT: cmpunordps %xmm1, %xmm0 +; SSE-NEXT: orps %xmm2, %xmm0 +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: testb $14, %al +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: movmsk_v4f32: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0 +; AVX1OR2-NEXT: vmovmskps %xmm0, %eax +; AVX1OR2-NEXT: testb $14, %al +; AVX1OR2-NEXT: setne %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: movmsk_v4f32: ; KNL: # %bb.0: @@ -4602,21 +4417,21 @@ define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) { } define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) { -; SSE2-LABEL: movmsk_and_v2f64: -; SSE2: # %bb.0: -; SSE2-NEXT: cmplepd %xmm0, %xmm1 -; SSE2-NEXT: movmskpd %xmm1, %eax -; SSE2-NEXT: cmpb $3, %al -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; AVX-LABEL: movmsk_and_v2f64: -; AVX: # %bb.0: -; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: cmpb $3, %al -; AVX-NEXT: sete %al -; AVX-NEXT: retq +; SSE-LABEL: movmsk_and_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: cmplepd %xmm0, %xmm1 +; SSE-NEXT: movmskpd %xmm1, %eax +; SSE-NEXT: cmpb $3, %al +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: movmsk_and_v2f64: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: cmpb $3, %al +; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: movmsk_and_v2f64: ; KNL: # %bb.0: @@ -4645,21 +4460,21 @@ define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) { } define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) { -; SSE2-LABEL: movmsk_or_v2f64: -; SSE2: # %bb.0: -; SSE2-NEXT: cmplepd %xmm0, %xmm1 -; SSE2-NEXT: movmskpd %xmm1, %eax -; SSE2-NEXT: testb %al, %al -; SSE2-NEXT: setne %al -; SSE2-NEXT: retq -; -; AVX-LABEL: movmsk_or_v2f64: -; AVX: # %bb.0: -; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: testb %al, %al -; AVX-NEXT: setne %al -; AVX-NEXT: retq +; SSE-LABEL: movmsk_or_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: cmplepd %xmm0, %xmm1 +; SSE-NEXT: movmskpd %xmm1, %eax +; SSE-NEXT: testb %al, %al +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: movmsk_or_v2f64: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: testb %al, %al +; AVX1OR2-NEXT: setne %al +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: movmsk_or_v2f64: ; KNL: # %bb.0: @@ -4686,25 +4501,25 @@ define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) { } define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) { -; SSE2-LABEL: PR39665_c_ray: -; SSE2: # %bb.0: -; SSE2-NEXT: cmpltpd %xmm0, %xmm1 -; SSE2-NEXT: movmskpd %xmm1, %eax -; SSE2-NEXT: cmpb $3, %al -; SSE2-NEXT: movl $42, %ecx -; SSE2-NEXT: movl $99, %eax -; SSE2-NEXT: cmovel %ecx, %eax -; SSE2-NEXT: retq -; -; AVX-LABEL: PR39665_c_ray: -; AVX: # %bb.0: -; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: cmpb $3, %al -; AVX-NEXT: movl $42, %ecx -; AVX-NEXT: movl $99, %eax -; AVX-NEXT: cmovel %ecx, %eax -; AVX-NEXT: retq +; SSE-LABEL: PR39665_c_ray: +; SSE: # %bb.0: +; SSE-NEXT: cmpltpd %xmm0, %xmm1 +; SSE-NEXT: movmskpd %xmm1, %eax +; SSE-NEXT: cmpb $3, %al +; SSE-NEXT: movl $42, %ecx +; SSE-NEXT: movl $99, %eax +; SSE-NEXT: cmovel %ecx, %eax +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: PR39665_c_ray: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax +; AVX1OR2-NEXT: cmpb $3, %al +; AVX1OR2-NEXT: movl $42, %ecx +; AVX1OR2-NEXT: movl $99, %eax +; AVX1OR2-NEXT: cmovel %ecx, %eax +; AVX1OR2-NEXT: retq ; ; KNL-LABEL: PR39665_c_ray: ; KNL: # %bb.0: