diff --git a/llvm/test/CodeGen/X86/v8i1-masks.ll b/llvm/test/CodeGen/X86/v8i1-masks.ll index c2dd3539e71e1..212d9764622de 100644 --- a/llvm/test/CodeGen/X86/v8i1-masks.ll +++ b/llvm/test/CodeGen/X86/v8i1-masks.ll @@ -1,25 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX2 +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X86-AVX2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2 +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefix=X86-AVX512 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefix=X64-AVX512 define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp { -; X32-LABEL: and_masks: -; X32: ## %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: vmovups (%edx), %ymm0 -; X32-NEXT: vmovups (%ecx), %ymm1 -; X32-NEXT: vcmpltps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vmovups (%eax), %ymm2 -; X32-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vandps %ymm1, %ymm0, %ymm0 -; X32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vmovaps %ymm0, (%eax) -; X32-NEXT: vzeroupper -; X32-NEXT: retl +; X86-LABEL: and_masks: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: vmovups (%edx), %ymm0 +; X86-NEXT: vmovups (%ecx), %ymm1 +; X86-NEXT: vcmpltps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vmovups (%eax), %ymm2 +; X86-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vmovaps %ymm0, (%eax) +; X86-NEXT: vzeroupper +; X86-NEXT: retl ; ; X64-LABEL: and_masks: ; X64: ## %bb.0: @@ -34,21 +36,21 @@ define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp { ; X64-NEXT: vzeroupper ; X64-NEXT: retq ; -; X32-AVX2-LABEL: and_masks: -; X32-AVX2: ## %bb.0: -; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-AVX2-NEXT: vmovups (%edx), %ymm0 -; X32-AVX2-NEXT: vmovups (%ecx), %ymm1 -; X32-AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vmovups (%eax), %ymm2 -; X32-AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 -; X32-AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 -; X32-AVX2-NEXT: vmovdqa %ymm0, (%eax) -; X32-AVX2-NEXT: vzeroupper -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: and_masks: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX2-NEXT: vmovups (%edx), %ymm0 +; X86-AVX2-NEXT: vmovups (%ecx), %ymm1 +; X86-AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vmovups (%eax), %ymm2 +; X86-AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 +; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax) +; X86-AVX2-NEXT: vzeroupper +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: and_masks: ; X64-AVX2: ## %bb.0: @@ -62,6 +64,33 @@ define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp { ; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax) ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: and_masks: +; X86-AVX512: ## %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX512-NEXT: vmovups (%edx), %ymm0 +; X86-AVX512-NEXT: vcmpgtps (%ecx), %ymm0, %k1 +; X86-AVX512-NEXT: vcmpgtps (%eax), %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa %ymm0, (%eax) +; X86-AVX512-NEXT: vzeroupper +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: and_masks: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vmovups (%rdi), %ymm0 +; X64-AVX512-NEXT: vcmpgtps (%rdx), %ymm0, %k1 +; X64-AVX512-NEXT: vcmpgtps (%rsi), %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa %ymm0, (%rax) +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq %v0 = load <8 x float>, ptr %a, align 16 %v1 = load <8 x float>, ptr %b, align 16 %m0 = fcmp olt <8 x float> %v1, %v0 @@ -74,16 +103,16 @@ define void @and_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp { } define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp { -; X32-LABEL: neg_masks: -; X32: ## %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: vmovups (%ecx), %ymm0 -; X32-NEXT: vcmpnltps (%eax), %ymm0, %ymm0 -; X32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vmovaps %ymm0, (%eax) -; X32-NEXT: vzeroupper -; X32-NEXT: retl +; X86-LABEL: neg_masks: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovups (%ecx), %ymm0 +; X86-NEXT: vcmpnltps (%eax), %ymm0, %ymm0 +; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vmovaps %ymm0, (%eax) +; X86-NEXT: vzeroupper +; X86-NEXT: retl ; ; X64-LABEL: neg_masks: ; X64: ## %bb.0: @@ -94,17 +123,17 @@ define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp { ; X64-NEXT: vzeroupper ; X64-NEXT: retq ; -; X32-AVX2-LABEL: neg_masks: -; X32-AVX2: ## %bb.0: -; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX2-NEXT: vmovups (%ecx), %ymm0 -; X32-AVX2-NEXT: vcmpnltps (%eax), %ymm0, %ymm0 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] -; X32-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 -; X32-AVX2-NEXT: vmovaps %ymm0, (%eax) -; X32-AVX2-NEXT: vzeroupper -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: neg_masks: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX2-NEXT: vmovups (%ecx), %ymm0 +; X86-AVX2-NEXT: vcmpnltps (%eax), %ymm0, %ymm0 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] +; X86-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X86-AVX2-NEXT: vmovaps %ymm0, (%eax) +; X86-AVX2-NEXT: vzeroupper +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: neg_masks: ; X64-AVX2: ## %bb.0: @@ -115,6 +144,30 @@ define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp { ; X64-AVX2-NEXT: vmovaps %ymm0, (%rax) ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: neg_masks: +; X86-AVX512: ## %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX512-NEXT: vmovups (%ecx), %ymm0 +; X86-AVX512-NEXT: vcmpnltps (%eax), %ymm0, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa %ymm0, (%eax) +; X86-AVX512-NEXT: vzeroupper +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: neg_masks: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vmovups (%rsi), %ymm0 +; X64-AVX512-NEXT: vcmpnltps (%rdi), %ymm0, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa %ymm0, (%rax) +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq %v0 = load <8 x float>, ptr %a, align 16 %v1 = load <8 x float>, ptr %b, align 16 %m0 = fcmp olt <8 x float> %v1, %v0 @@ -125,15 +178,15 @@ define void @neg_masks(ptr %a, ptr %b, ptr %c) nounwind uwtable noinline ssp { } define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) { -; X32-LABEL: and_mask_constant: -; X32: ## %bb.0: -; X32-NEXT: vextractf128 $1, %ymm0, %xmm1 -; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X32-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 -; X32-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; X32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: retl +; X86-LABEL: and_mask_constant: +; X86: ## %bb.0: +; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X86-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 +; X86-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: and_mask_constant: ; X64: ## %bb.0: @@ -145,12 +198,12 @@ define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) { ; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: and_mask_constant: -; X32-AVX2: ## %bb.0: -; X32-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; X32-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: and_mask_constant: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: and_mask_constant: ; X64-AVX2: ## %bb.0: @@ -158,6 +211,26 @@ define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) { ; X64-AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: and_mask_constant: +; X86-AVX512: ## %bb.0: +; X86-AVX512-NEXT: movb $105, %al +; X86-AVX512-NEXT: kmovw %eax, %k1 +; X86-AVX512-NEXT: vptestnmd %ymm0, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: and_mask_constant: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: movb $105, %al +; X64-AVX512-NEXT: kmovw %eax, %k1 +; X64-AVX512-NEXT: vptestnmd %ymm0, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 +; X64-AVX512-NEXT: retq %m = icmp eq <8 x i32> %v0, zeroinitializer %mand = and <8 x i1> %m, %r = zext <8 x i1> %mand to <8 x i32> @@ -165,13 +238,13 @@ define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) { } define <8 x i32> @two_ands(<8 x float> %x) local_unnamed_addr #0 { -; X32-LABEL: two_ands: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: two_ands: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: two_ands: ; X64: ## %bb.0: ## %entry @@ -181,14 +254,14 @@ define <8 x i32> @two_ands(<8 x float> %x) local_unnamed_addr #0 { ; X64-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: two_ands: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: two_ands: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: two_ands: ; X64-AVX2: ## %bb.0: ## %entry @@ -198,6 +271,22 @@ define <8 x i32> @two_ands(<8 x float> %x) local_unnamed_addr #0 { ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: two_ands: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: two_ands: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -207,16 +296,16 @@ entry: } define <8 x i32> @three_ands(<8 x float> %x) { -; X32-LABEL: three_ands: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: three_ands: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: three_ands: ; X64: ## %bb.0: ## %entry @@ -229,17 +318,17 @@ define <8 x i32> @three_ands(<8 x float> %x) { ; X64-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: three_ands: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: three_ands: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: three_ands: ; X64-AVX2: ## %bb.0: ## %entry @@ -252,6 +341,26 @@ define <8 x i32> @three_ands(<8 x float> %x) { ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: three_ands: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: three_ands: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -263,18 +372,18 @@ entry: } define <8 x i32> @four_ands(<8 x float> %x) { -; X32-LABEL: four_ands: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: four_ands: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: four_ands: ; X64: ## %bb.0: ## %entry @@ -289,20 +398,20 @@ define <8 x i32> @four_ands(<8 x float> %x) { ; X64-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: four_ands: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: four_ands: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: four_ands: ; X64-AVX2: ## %bb.0: ## %entry @@ -318,6 +427,28 @@ define <8 x i32> @four_ands(<8 x float> %x) { ; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: four_ands: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: four_ands: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -331,20 +462,20 @@ entry: } define <8 x i32> @five_ands(<8 x float> %x) { -; X32-LABEL: five_ands: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: five_ands: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: five_ands: ; X64: ## %bb.0: ## %entry @@ -361,23 +492,23 @@ define <8 x i32> @five_ands(<8 x float> %x) { ; X64-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: five_ands: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: five_ands: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: five_ands: ; X64-AVX2: ## %bb.0: ## %entry @@ -396,6 +527,30 @@ define <8 x i32> @five_ands(<8 x float> %x) { ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: five_ands: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: five_ands: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -411,13 +566,13 @@ entry: } define <8 x i32> @two_or(<8 x float> %x) { -; X32-LABEL: two_or: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: two_or: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: two_or: ; X64: ## %bb.0: ## %entry @@ -427,14 +582,14 @@ define <8 x i32> @two_or(<8 x float> %x) { ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: two_or: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0 -; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: two_or: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: two_or: ; X64-AVX2: ## %bb.0: ## %entry @@ -444,6 +599,24 @@ define <8 x i32> @two_or(<8 x float> %x) { ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: two_or: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: two_or: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -453,16 +626,16 @@ entry: } define <8 x i32> @three_or(<8 x float> %x) { -; X32-LABEL: three_or: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-NEXT: vorps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: three_or: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: three_or: ; X64: ## %bb.0: ## %entry @@ -475,17 +648,17 @@ define <8 x i32> @three_or(<8 x float> %x) { ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: three_or: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: three_or: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: three_or: ; X64-AVX2: ## %bb.0: ## %entry @@ -498,6 +671,30 @@ define <8 x i32> @three_or(<8 x float> %x) { ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: three_or: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k0 +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: three_or: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k0 +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -509,18 +706,18 @@ entry: } ; Function Attrs: norecurse nounwind readnone ssp uwtable define <8 x i32> @four_or(<8 x float> %x) { -; X32-LABEL: four_or: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-NEXT: vorps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: four_or: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-NEXT: vorps %ymm3, %ymm2, %ymm2 +; X86-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: four_or: ; X64: ## %bb.0: ## %entry @@ -535,20 +732,20 @@ define <8 x i32> @four_or(<8 x float> %x) { ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: four_or: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: four_or: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: four_or: ; X64-AVX2: ## %bb.0: ## %entry @@ -564,6 +761,34 @@ define <8 x i32> @four_or(<8 x float> %x) { ; X64-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: four_or: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k0 +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k0 +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: four_or: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k0 +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k0 +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -577,20 +802,20 @@ entry: } ; Function Attrs: norecurse nounwind readnone ssp uwtable define <8 x i32> @five_or(<8 x float> %x) { -; X32-LABEL: five_or: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-NEXT: vorps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vorps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: five_or: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-NEXT: vorps %ymm3, %ymm2, %ymm2 +; X86-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: five_or: ; X64: ## %bb.0: ## %entry @@ -607,23 +832,23 @@ define <8 x i32> @five_or(<8 x float> %x) { ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: five_or: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2 -; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: five_or: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2 +; X86-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: five_or: ; X64-AVX2: ## %bb.0: ## %entry @@ -642,6 +867,38 @@ define <8 x i32> @five_or(<8 x float> %x) { ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: five_or: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k0 +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k0 +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k0 +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: five_or: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k0 +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k0 +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k0 +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -657,16 +914,16 @@ entry: } define <8 x i32> @three_or_and(<8 x float> %x) { -; X32-LABEL: three_or_and: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X32-NEXT: retl +; X86-LABEL: three_or_and: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vorps %ymm1, %ymm0, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: three_or_and: ; X64: ## %bb.0: ## %entry @@ -679,17 +936,17 @@ define <8 x i32> @three_or_and(<8 x float> %x) { ; X64-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: three_or_and: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: three_or_and: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: three_or_and: ; X64-AVX2: ## %bb.0: ## %entry @@ -702,6 +959,28 @@ define <8 x i32> @three_or_and(<8 x float> %x) { ; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X64-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: three_or_and: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: korw %k0, %k1, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: three_or_and: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: korw %k0, %k1, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -713,18 +992,18 @@ entry: } define <8 x i32> @four_or_and(<8 x float> %x) { -; X32-LABEL: four_or_and: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: four_or_and: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: four_or_and: ; X64: ## %bb.0: ## %entry @@ -739,20 +1018,20 @@ define <8 x i32> @four_or_and(<8 x float> %x) { ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: four_or_and: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: four_or_and: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: four_or_and: ; X64-AVX2: ## %bb.0: ## %entry @@ -768,6 +1047,30 @@ define <8 x i32> @four_or_and(<8 x float> %x) { ; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: four_or_and: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 {%k1} +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: korw %k1, %k0, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: four_or_and: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 {%k1} +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: korw %k1, %k0, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -781,20 +1084,20 @@ entry: } define <8 x i32> @five_or_and(<8 x float> %x) { -; X32-LABEL: five_or_and: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vorps %ymm1, %ymm2, %ymm1 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: five_or_and: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-NEXT: vorps %ymm1, %ymm2, %ymm1 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: five_or_and: ; X64: ## %bb.0: ## %entry @@ -811,23 +1114,23 @@ define <8 x i32> @five_or_and(<8 x float> %x) { ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: five_or_and: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-AVX2-NEXT: vorps %ymm1, %ymm2, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: five_or_and: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-AVX2-NEXT: vorps %ymm1, %ymm2, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: five_or_and: ; X64-AVX2: ## %bb.0: ## %entry @@ -846,6 +1149,34 @@ define <8 x i32> @five_or_and(<8 x float> %x) { ; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: five_or_and: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: korw %k0, %k1, %k0 +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: korw %k1, %k0, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: five_or_and: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: korw %k0, %k1, %k0 +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: korw %k1, %k0, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -861,18 +1192,18 @@ entry: } define <8 x i32> @four_or_and_xor(<8 x float> %x) { -; X32-LABEL: four_or_and_xor: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: four_or_and_xor: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: four_or_and_xor: ; X64: ## %bb.0: ## %entry @@ -887,20 +1218,20 @@ define <8 x i32> @four_or_and_xor(<8 x float> %x) { ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: four_or_and_xor: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: four_or_and_xor: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: four_or_and_xor: ; X64-AVX2: ## %bb.0: ## %entry @@ -916,6 +1247,32 @@ define <8 x i32> @four_or_and_xor(<8 x float> %x) { ; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 ; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: four_or_and_xor: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: kxorw %k1, %k0, %k0 +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: korw %k1, %k0, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: four_or_and_xor: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: kxorw %k1, %k0, %k0 +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: korw %k1, %k0, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -929,20 +1286,20 @@ entry: } ; Function Attrs: norecurse nounwind readnone ssp uwtable define <8 x i32> @five_or_and_xor(<8 x float> %x) { -; X32-LABEL: five_or_and_xor: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm3, %ymm0 -; X32-NEXT: vxorps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X32-NEXT: retl +; X86-LABEL: five_or_and_xor: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-NEXT: vxorps %ymm3, %ymm2, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vandps %ymm0, %ymm3, %ymm0 +; X86-NEXT: vxorps %ymm0, %ymm2, %ymm0 +; X86-NEXT: vorps %ymm1, %ymm0, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: five_or_and_xor: ; X64: ## %bb.0: ## %entry @@ -959,23 +1316,23 @@ define <8 x i32> @five_or_and_xor(<8 x float> %x) { ; X64-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: five_or_and_xor: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm3, %ymm0 -; X32-AVX2-NEXT: vxorps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: five_or_and_xor: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-AVX2-NEXT: vxorps %ymm3, %ymm2, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm0 +; X86-AVX2-NEXT: vandps %ymm0, %ymm3, %ymm0 +; X86-AVX2-NEXT: vxorps %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: five_or_and_xor: ; X64-AVX2: ## %bb.0: ## %entry @@ -994,6 +1351,36 @@ define <8 x i32> @five_or_and_xor(<8 x float> %x) { ; X64-AVX2-NEXT: vxorps %ymm0, %ymm2, %ymm0 ; X64-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: five_or_and_xor: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k2 +; X86-AVX512-NEXT: kxorw %k2, %k1, %k1 +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2 +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2 {%k2} +; X86-AVX512-NEXT: kxorw %k2, %k1, %k1 +; X86-AVX512-NEXT: korw %k0, %k1, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: five_or_and_xor: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k2 +; X64-AVX512-NEXT: kxorw %k2, %k1, %k1 +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2 +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2 {%k2} +; X64-AVX512-NEXT: kxorw %k2, %k1, %k1 +; X64-AVX512-NEXT: korw %k0, %k1, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x, @@ -1008,22 +1395,22 @@ entry: ret <8 x i32> %or } define <8 x i32> @six_or_and_xor(<8 x float> %x) { -; X32-LABEL: six_or_and_xor: -; X32: ## %bb.0: ## %entry -; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 -; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3 -; X32-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3 -; X32-NEXT: vxorps %ymm1, %ymm3, %ymm1 -; X32-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 -; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-NEXT: retl +; X86-LABEL: six_or_and_xor: +; X86: ## %bb.0: ## %entry +; X86-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 +; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3 +; X86-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3 +; X86-NEXT: vxorps %ymm1, %ymm3, %ymm1 +; X86-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-NEXT: retl ; ; X64-LABEL: six_or_and_xor: ; X64: ## %bb.0: ## %entry @@ -1042,26 +1429,26 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) { ; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; -; X32-AVX2-LABEL: six_or_and_xor: -; X32-AVX2: ## %bb.0: ## %entry -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vxorps %ymm1, %ymm3, %ymm1 -; X32-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 -; X32-AVX2-NEXT: retl +; X86-AVX2-LABEL: six_or_and_xor: +; X86-AVX2: ## %bb.0: ## %entry +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; X86-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; X86-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X86-AVX2-NEXT: vxorps %ymm1, %ymm3, %ymm1 +; X86-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: six_or_and_xor: ; X64-AVX2: ## %bb.0: ## %entry @@ -1083,6 +1470,38 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) { ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 ; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq +; +; X86-AVX512-LABEL: six_or_and_xor: +; X86-AVX512: ## %bb.0: ## %entry +; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2 +; X86-AVX512-NEXT: kxorw %k0, %k2, %k0 +; X86-AVX512-NEXT: kxorw %k1, %k0, %k0 +; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 +; X86-AVX512-NEXT: korw %k1, %k0, %k1 +; X86-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-AVX512-NEXT: retl +; +; X64-AVX512-LABEL: six_or_and_xor: +; X64-AVX512: ## %bb.0: ## %entry +; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2 +; X64-AVX512-NEXT: kxorw %k0, %k2, %k0 +; X64-AVX512-NEXT: kxorw %k1, %k0, %k0 +; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 +; X64-AVX512-NEXT: korw %k1, %k0, %k1 +; X64-AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-AVX512-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, %cmp1 = fcmp olt <8 x float> %x,