Expand Up
@@ -3822,145 +3822,6 @@ define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
declare i32 @llvm.x86.avx512.vcomi.ss (<4 x float >, <4 x float >, i32 , i32 )
declare <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float >, <16 x float >, i16 )
define <16 x float >@test_int_x86_avx512_mask_broadcastf32x4_512 (<4 x float > %x0 , <16 x float > %x2 , i16 %mask ) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res1 = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > %x2 , i16 -1 )
%res2 = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > %x2 , i16 %mask )
%res3 = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > zeroinitializer , i16 %mask )
%res4 = fadd <16 x float > %res1 , %res2
%res5 = fadd <16 x float > %res3 , %res4
ret <16 x float > %res5
}
define <16 x float >@test_int_x86_avx512_mask_broadcastf32x4_512_load (<4 x float >* %x0ptr , <16 x float > %x2 , i16 %mask ) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovaps (%rdi), %xmm1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%x0 = load <4 x float >, <4 x float >* %x0ptr
%res = call <16 x float > @llvm.x86.avx512.mask.broadcastf32x4.512 (<4 x float > %x0 , <16 x float > %x2 , i16 %mask )
ret <16 x float > %res
}
declare <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double >, <8 x double >, i8 )
define <8 x double >@test_int_x86_avx512_mask_broadcastf64x4_512 (<4 x double > %x0 , <8 x double > %x2 , i8 %mask ) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3]
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res1 = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > %x2 , i8 -1 )
%res2 = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > %x2 , i8 %mask )
%res3 = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > zeroinitializer , i8 %mask )
%res4 = fadd <8 x double > %res1 , %res2
%res5 = fadd <8 x double > %res3 , %res4
ret <8 x double > %res5
}
define <8 x double >@test_int_x86_avx512_mask_broadcastf64x4_512_load (<4 x double >* %x0ptr , <8 x double > %x2 , i8 %mask ) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovapd (%rdi), %ymm1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%x0 = load <4 x double >, <4 x double >* %x0ptr
%res = call <8 x double > @llvm.x86.avx512.mask.broadcastf64x4.512 (<4 x double > %x0 , <8 x double > %x2 , i8 %mask )
ret <8 x double > %res
}
declare <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 >, <16 x i32 >, i16 )
define <16 x i32 >@test_int_x86_avx512_mask_broadcasti32x4_512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 %mask ) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res1 = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 -1 )
%res2 = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 %mask )
%res3 = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > zeroinitializer , i16 %mask )
%res4 = add <16 x i32 > %res1 , %res2
%res5 = add <16 x i32 > %res3 , %res4
ret <16 x i32 > %res5
}
define <16 x i32 >@test_int_x86_avx512_mask_broadcasti32x4_512_load (<4 x i32 >* %x0ptr , <16 x i32 > %x2 , i16 %mask ) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovdqa (%rdi), %xmm1
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%x0 = load <4 x i32 >, <4 x i32 >* %x0ptr
%res = call <16 x i32 > @llvm.x86.avx512.mask.broadcasti32x4.512 (<4 x i32 > %x0 , <16 x i32 > %x2 , i16 %mask )
ret <16 x i32 > %res
}
declare <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 >, <8 x i64 >, i8 )
define <8 x i64 >@test_int_x86_avx512_mask_broadcasti64x4_512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 %mask ) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3]
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%res1 = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 -1 )
%res2 = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 %mask )
%res3 = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > zeroinitializer , i8 %mask )
%res4 = add <8 x i64 > %res1 , %res2
%res5 = add <8 x i64 > %res3 , %res4
ret <8 x i64 > %res5
}
define <8 x i64 >@test_int_x86_avx512_mask_broadcasti64x4_512_load (<4 x i64 >* %x0ptr , <8 x i64 > %x2 , i8 %mask ) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovdqa (%rdi), %ymm1
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,0,1,2,3]
; CHECK-NEXT: retq
%x0 = load <4 x i64 >, <4 x i64 >* %x0ptr
%res = call <8 x i64 > @llvm.x86.avx512.mask.broadcasti64x4.512 (<4 x i64 > %x0 , <8 x i64 > %x2 , i8 %mask )
ret <8 x i64 > %res
}
declare <16 x i32 > @llvm.x86.avx512.mask.prorv.d.512 (<16 x i32 >, <16 x i32 >, <16 x i32 >, i16 )
define <16 x i32 >@test_int_x86_avx512_mask_prorv_d_512 (<16 x i32 > %x0 , <16 x i32 > %x1 , <16 x i32 > %x2 , i16 %x3 ) {
Expand Down