diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index 84858e56f8760..900e36a1e5168 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -46,61 +46,60 @@ define i64 @test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) nounwind { declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64) -define <64 x i8> @test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) nounwind { +define { <64 x i8>, <64 x i8>, <64 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: ; X86: # %bb.0: ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04] -; X86-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0xc1] -; X86-NEXT: vmovdqu8 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd1] -; X86-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] -; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] +; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x5c,0x24,0x04] +; X86-NEXT: vpblendmb %zmm3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x66,0xcb] +; X86-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd3] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastb %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xcf] +; X64-NEXT: vmovdqa64 %zmm0, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc8] +; X64-NEXT: vpbroadcastb %edi, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xc7] ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastb %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc7] +; X64-NEXT: vpbroadcastb %edi, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xcf] ; X64-NEXT: vpbroadcastb %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd7] -; X64-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] -; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask) %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask) - %res3 = add <64 x i8> %res, %res1 - %res4 = add <64 x i8> %res2, %res3 - ret <64 x i8> %res4 - } + %res3 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> %res, 0 + %res4 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res3, <64 x i8> %res1, 1 + %res5 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res4, <64 x i8> %res2, 2 + ret { <64 x i8>, <64 x i8>, <64 x i8> } %res5 +} declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) nounwind { + +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x4c,0x24,0x02] +; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x5c,0x24,0x02] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc1] -; X86-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0xd1] -; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] -; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X86-NEXT: vpblendmw %zmm3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x66,0xcb] +; X86-NEXT: vmovdqu16 %zmm3, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0xd3] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xcf] +; X64-NEXT: vmovdqa64 %zmm0, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc8] +; X64-NEXT: vpbroadcastw %edi, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xc7] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastw %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc7] +; X64-NEXT: vpbroadcastw %edi, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xcf] ; X64-NEXT: vpbroadcastw %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd7] -; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] -; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1) - %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask) - %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask) - %res3 = add <32 x i16> %res, %res1 - %res4 = add <32 x i16> %res2, %res3 - ret <32 x i16> %res4 +; X64-NEXT: retq # encoding: [0xc3] + %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask) + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64) @@ -155,77 +154,78 @@ define void @test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) nounwind { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_512: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] -; X86-NEXT: vmovdqu16 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x00] -; X86-NEXT: vmovdqu16 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x09] -; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] +; X86-NEXT: vpblendmw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x66,0x08] +; X86-NEXT: vmovdqu16 (%ecx), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_512: ; X64: # %bb.0: ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu16 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x06] -; X64-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x0f] -; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] +; X64-NEXT: vpblendmw (%rsi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x66,0x0e] +; X64-NEXT: vmovdqu16 (%rdi), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] - %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1) - %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask) - %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask) - %res2 = add <32 x i16> %res, %res1 - ret <32 x i16> %res2 + %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res, i32 %mask) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask) + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64) -define <64 x i8> @test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) nounwind { +define { <64 x i8>, <64 x i8>, <64 x i8> } @test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] -; X86-NEXT: vmovdqu8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x00] -; X86-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x09] -; X86-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] +; X86-NEXT: vpblendmb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x66,0x08] +; X86-NEXT: vmovdqu8 (%ecx), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_512: ; X64: # %bb.0: ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] ; X64-NEXT: kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu8 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x06] -; X64-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x0f] -; X64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] +; X64-NEXT: vpblendmb (%rsi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x66,0x0e] +; X64-NEXT: vmovdqu8 (%rdi), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] - %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1) - %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask) - %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask) - %res2 = add <64 x i8> %res, %res1 - ret <64 x i8> %res2 + %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res, i64 %mask) + %res2 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask) + %res3 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> %res, 0 + %res4 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res3, <64 x i8> %res1, 1 + %res5 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res4, <64 x i8> %res2, 2 + ret { <64 x i8>, <64 x i8>, <64 x i8> } %res5 } declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32) -define <8 x i64> @test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) nounwind { +define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) nounwind { ; CHECK-LABEL: test_int_x86_avx512_psll_dq_512: ; CHECK: # %bb.0: -; CHECK-NEXT: vpslldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xf8,0x08] -; CHECK-NEXT: # zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55] -; CHECK-NEXT: vpslldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xf8,0x04] -; CHECK-NEXT: # zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59] -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; CHECK-NEXT: vpslldq $8, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x73,0xf8,0x08] +; CHECK-NEXT: # zmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55] +; CHECK-NEXT: vpslldq $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xf8,0x04] +; CHECK-NEXT: # zmm1 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59] +; CHECK-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) - %res2 = add <8 x i64> %res, %res1 - ret <8 x i64> %res2 + %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0 + %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 + ret { <8 x i64>, <8 x i64> } %res3 } define <8 x i64> @test_int_x86_avx512_psll_load_dq_512(<8 x i64>* %p0) nounwind { @@ -248,19 +248,20 @@ define <8 x i64> @test_int_x86_avx512_psll_load_dq_512(<8 x i64>* %p0) nounwind declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32) -define <8 x i64> @test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) nounwind { +define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) nounwind { ; CHECK-LABEL: test_int_x86_avx512_psrl_dq_512: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsrldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xd8,0x08] -; CHECK-NEXT: # zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vpsrldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xd8,0x04] -; CHECK-NEXT: # zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; CHECK-NEXT: vpsrldq $8, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x73,0xd8,0x08] +; CHECK-NEXT: # zmm2 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpsrldq $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xd8,0x04] +; CHECK-NEXT: # zmm1 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero +; CHECK-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) - %res2 = add <8 x i64> %res, %res1 - ret <8 x i64> %res2 + %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0 + %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 + ret { <8 x i64>, <8 x i64> } %res3 } define <8 x i64> @test_int_x86_avx512_psrl_load_dq_512(<8 x i64>* %p0) nounwind { @@ -1089,32 +1090,33 @@ define <32 x i16> @test_int_x86_avx512_maskz_psrl_w_512(<32 x i16> %x0, <8 x i16 declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03] -; X86-NEXT: vpsrlw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x04] -; X86-NEXT: vpsrlw $5, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x05] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X86-NEXT: vpsrlw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xd0,0x03] +; X86-NEXT: vpsrlw $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x71,0xd0,0x04] +; X86-NEXT: vpsrlw $5, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xd0,0x05] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_512: ; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03] -; X64-NEXT: vpsrlw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x04] -; X64-NEXT: vpsrlw $5, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x05] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X64-NEXT: vpsrlw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xd0,0x03] +; X64-NEXT: vpsrlw $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x71,0xd0,0x04] +; X64-NEXT: vpsrlw $5, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xd0,0x05] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 4, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 5, <32 x i16> zeroinitializer, i32 %x3) - %res3 = add <32 x i16> %res, %res1 - %res4 = add <32 x i16> %res3, %res2 - ret <32 x i16> %res4 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) @@ -1164,32 +1166,33 @@ define <32 x i16> @test_int_x86_avx512_maskz_psra_w_512(<32 x i16> %x0, <8 x i16 declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03] -; X86-NEXT: vpsraw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xe0,0x04] -; X86-NEXT: vpsraw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x05] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X86-NEXT: vpsraw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xe0,0x03] +; X86-NEXT: vpsraw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xe0,0x04] +; X86-NEXT: vpsraw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x05] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_512: ; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03] -; X64-NEXT: vpsraw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xe0,0x04] -; X64-NEXT: vpsraw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x05] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X64-NEXT: vpsraw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xe0,0x03] +; X64-NEXT: vpsraw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xe0,0x04] +; X64-NEXT: vpsraw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x05] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1) - %res3 = add <32 x i16> %res, %res1 - %res4 = add <32 x i16> %res3, %res2 - ret <32 x i16> %res4 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) @@ -1239,32 +1242,33 @@ define <32 x i16> @test_int_x86_avx512_maskz_psll_w_512(<32 x i16> %x0, <8 x i16 declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03] -; X86-NEXT: vpsllw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xf0,0x04] -; X86-NEXT: vpsllw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x05] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X86-NEXT: vpsllw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xf0,0x03] +; X86-NEXT: vpsllw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xf0,0x04] +; X86-NEXT: vpsllw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x05] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_512: ; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03] -; X64-NEXT: vpsllw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xf0,0x04] -; X64-NEXT: vpsllw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x05] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X64-NEXT: vpsllw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xf0,0x03] +; X64-NEXT: vpsllw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xf0,0x04] +; X64-NEXT: vpsllw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x05] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1) - %res3 = add <32 x i16> %res, %res1 - %res4 = add <32 x i16> %res3, %res2 - ret <32 x i16> %res4 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) @@ -3003,32 +3007,35 @@ define <32 x i16> @test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <3 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) nounwind { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: ; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 3, <32 x i16> zeroinitializer, i32 %x4) %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4, <32 x i16> %x3, i32 -1) - %res3 = add <32 x i16> %res, %res1 - %res4 = add <32 x i16> %res3, %res2 - ret <32 x i16> %res4 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind { diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 97d558cdb8984..ed2428726aa74 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1238,25 +1238,27 @@ define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32) -define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: ; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) %2 = bitcast i32 %x4 to <32 x i1> @@ -1265,24 +1267,26 @@ define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %5 = bitcast i32 %x4 to <32 x i1> %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4) - %res3 = add <32 x i16> %3, %6 - %res4 = add <32 x i16> %res3, %7 - ret <32 x i16> %res4 + %res1 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0 + %res2 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res1, <32 x i16> %6, 1 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res2, <32 x i16> %7, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res3 } declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) -define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ +define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ ; CHECK-LABEL: test_int_x86_avx512_mask_psadb_w_512: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc9] -; CHECK-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc2] -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; CHECK-NEXT: vpsadbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xd9] +; CHECK-NEXT: vpsadbw %zmm2, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xca] +; CHECK-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) + %res0 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) - %res2 = add <8 x i64> %res, %res1 - ret <8 x i64> %res2 + %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 + ret { <8 x i64>, <8 x i64> } %res3 } declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll index f9b50b0e2b298..64d7de4b99d61 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -4,127 +4,123 @@ declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16) -define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) { +define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x4c,0x24,0x04] +; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x5c,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0xc1] -; X86-NEXT: vmovdqu8 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd1] -; X86-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] -; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpblendmb %xmm3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0xcb] +; X86-NEXT: vmovdqu8 %xmm3, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd3] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastb %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xcf] +; X64-NEXT: vmovdqa %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc8] +; X64-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastb %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] +; X64-NEXT: vpbroadcastb %edi, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xcf] ; X64-NEXT: vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7] -; X64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] -; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask) %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask) - %res3 = add <16 x i8> %res, %res1 - %res4 = add <16 x i8> %res2, %res3 - ret <16 x i8> %res4 + %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 + %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 + %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 + ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 } declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x4c,0x24,0x04] +; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x5c,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1] -; X86-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd1] -; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] -; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpblendmw %xmm3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0xcb] +; X86-NEXT: vmovdqu16 %xmm3, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd3] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xcf] +; X64-NEXT: vmovdqa %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc8] +; X64-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastw %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] +; X64-NEXT: vpbroadcastw %edi, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xcf] ; X64-NEXT: vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7] -; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] -; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask) %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask) - %res3 = add <8 x i16> %res, %res1 - %res4 = add <8 x i16> %res2, %res3 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32) -define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) { +define { <32 x i8>, <32 x i8>, <32 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x4c,0x24,0x04] +; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x5c,0x24,0x04] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0xc1] -; X86-NEXT: vmovdqu8 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd1] -; X86-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] -; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] +; X86-NEXT: vpblendmb %ymm3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0xcb] +; X86-NEXT: vmovdqu8 %ymm3, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd3] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastb %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xcf] +; X64-NEXT: vmovdqa %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc8] +; X64-NEXT: vpbroadcastb %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xc7] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastb %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] +; X64-NEXT: vpbroadcastb %edi, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xcf] ; X64-NEXT: vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7] -; X64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] -; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) + %res0 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask) %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask) - %res3 = add <32 x i8> %res, %res1 - %res4 = add <32 x i8> %res2, %res3 - ret <32 x i8> %res4 + %res3 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %res0, 0 + %res4 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res3, <32 x i8> %res1, 1 + %res5 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res4, <32 x i8> %res2, 2 + ret { <32 x i8>, <32 x i8>, <32 x i8> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x4c,0x24,0x04] +; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x5c,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0xc1] -; X86-NEXT: vmovdqu16 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd1] -; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] -; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X86-NEXT: vpblendmw %ymm3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0xcb] +; X86-NEXT: vmovdqu16 %ymm3, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd3] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xcf] +; X64-NEXT: vmovdqa %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc8] +; X64-NEXT: vpbroadcastw %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xc7] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastw %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] +; X64-NEXT: vpbroadcastw %edi, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xcf] ; X64-NEXT: vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7] -; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] -; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask) %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask) - %res3 = add <16 x i16> %res, %res1 - %res4 = add <16 x i16> %res2, %res3 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32) @@ -498,7 +494,7 @@ define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i1 declare <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8*, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x i16> %x1, i8 %mask) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x i16> %x1, i8 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_128: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] @@ -506,111 +502,111 @@ define <8 x i16>@test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x ; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] ; X86-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X86-NEXT: vmovdqu16 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x00] -; X86-NEXT: vmovdqu16 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x09] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpblendmw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0x08] +; X86-NEXT: vmovdqu16 (%ecx), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_128: ; X64: # %bb.0: ; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu16 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x06] -; X64-NEXT: vmovdqu16 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x0f] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpblendmw (%rsi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0x0e] +; X64-NEXT: vmovdqu16 (%rdi), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> %x1, i8 -1) - %res = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr2, <8 x i16> %res0, i8 %mask) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> zeroinitializer, i8 %mask) - %res2 = add <8 x i16> %res, %res1 - ret <8 x i16> %res2 + %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr2, <8 x i16> %res0, i8 %mask) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> zeroinitializer, i8 %mask) + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8*, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_loadu_w_256(i8* %ptr, i8* %ptr2, <16 x i16> %x1, i16 %mask) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_loadu_w_256(i8* %ptr, i8* %ptr2, <16 x i16> %x1, i16 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_256: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] -; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x00] -; X86-NEXT: vmovdqu16 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x09] -; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] +; X86-NEXT: vpblendmw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0x08] +; X86-NEXT: vmovdqu16 (%ecx), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_256: ; X64: # %bb.0: ; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu16 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x06] -; X64-NEXT: vmovdqu16 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x0f] -; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] +; X64-NEXT: vpblendmw (%rsi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0x0e] +; X64-NEXT: vmovdqu16 (%rdi), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> %x1, i16 -1) - %res = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr2, <16 x i16> %res0, i16 %mask) - %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> zeroinitializer, i16 %mask) - %res2 = add <16 x i16> %res, %res1 - ret <16 x i16> %res2 + %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr2, <16 x i16> %res0, i16 %mask) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> zeroinitializer, i16 %mask) + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8*, <16 x i8>, i16) -define <16 x i8>@test_int_x86_avx512_mask_loadu_b_128(i8* %ptr, i8* %ptr2, <16 x i8> %x1, i16 %mask) { +define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_loadu_b_128(i8* %ptr, i8* %ptr2, <16 x i8> %x1, i16 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_128: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] -; X86-NEXT: vmovdqu8 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x00] -; X86-NEXT: vmovdqu8 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x09] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpblendmb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0x08] +; X86-NEXT: vmovdqu8 (%ecx), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_128: ; X64: # %bb.0: ; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu8 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x06] -; X64-NEXT: vmovdqu8 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x0f] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpblendmb (%rsi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0x0e] +; X64-NEXT: vmovdqu8 (%rdi), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> %x1, i16 -1) - %res = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr2, <16 x i8> %res0, i16 %mask) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> zeroinitializer, i16 %mask) - %res2 = add <16 x i8> %res, %res1 - ret <16 x i8> %res2 + %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr2, <16 x i8> %res0, i16 %mask) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> zeroinitializer, i16 %mask) + %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 + %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 + %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 + ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 } declare <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8*, <32 x i8>, i32) -define <32 x i8>@test_int_x86_avx512_mask_loadu_b_256(i8* %ptr, i8* %ptr2, <32 x i8> %x1, i32 %mask) { +define { <32 x i8>, <32 x i8>, <32 x i8> } @test_int_x86_avx512_mask_loadu_b_256(i8* %ptr, i8* %ptr2, <32 x i8> %x1, i32 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_256: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] -; X86-NEXT: vmovdqu8 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x00] -; X86-NEXT: vmovdqu8 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x09] -; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] +; X86-NEXT: vpblendmb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0x08] +; X86-NEXT: vmovdqu8 (%ecx), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_256: ; X64: # %bb.0: ; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu8 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x06] -; X64-NEXT: vmovdqu8 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x0f] -; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] +; X64-NEXT: vpblendmb (%rsi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0x0e] +; X64-NEXT: vmovdqu8 (%rdi), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> %x1, i32 -1) - %res = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr2, <32 x i8> %res0, i32 %mask) - %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> zeroinitializer, i32 %mask) - %res2 = add <32 x i8> %res, %res1 - ret <32 x i8> %res2 + %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr2, <32 x i8> %res0, i32 %mask) + %res2 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> zeroinitializer, i32 %mask) + %res3 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %res0, 0 + %res4 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res3, <32 x i8> %res1, 1 + %res5 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res4, <32 x i8> %res2, 2 + ret { <32 x i8>, <32 x i8>, <32 x i8> } %res5 } declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16) @@ -3151,185 +3147,191 @@ define <16 x i16>@test_int_x86_avx512_maskz_psll_w_256(<16 x i16> %x0, <8 x i16> declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] -; X86-NEXT: vpsrlw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x04] -; X86-NEXT: vpsrlw $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x05] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpsrlw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xd0,0x03] +; X86-NEXT: vpsrlw $4, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x71,0xd0,0x04] +; X86-NEXT: vpsrlw $5, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x05] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_128: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] -; X64-NEXT: vpsrlw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x04] -; X64-NEXT: vpsrlw $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x05] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpsrlw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xd0,0x03] +; X64-NEXT: vpsrlw $4, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x71,0xd0,0x04] +; X64-NEXT: vpsrlw $5, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x05] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) + %res0 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 4, <8 x i16> %x2, i8 -1) %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 5, <8 x i16> zeroinitializer, i8 %x3) - %res3 = add <8 x i16> %res, %res1 - %res4 = add <8 x i16> %res2, %res3 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] -; X86-NEXT: vpsrlw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x04] -; X86-NEXT: vpsrlw $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x05] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X86-NEXT: vpsrlw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xd0,0x03] +; X86-NEXT: vpsrlw $4, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x71,0xd0,0x04] +; X86-NEXT: vpsrlw $5, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x05] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_256: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] -; X64-NEXT: vpsrlw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x04] -; X64-NEXT: vpsrlw $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x05] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X64-NEXT: vpsrlw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xd0,0x03] +; X64-NEXT: vpsrlw $4, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x71,0xd0,0x04] +; X64-NEXT: vpsrlw $5, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x05] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) + %res0 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 4, <16 x i16> %x2, i16 -1) %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 5, <16 x i16> zeroinitializer, i16 %x3) - %res3 = add <16 x i16> %res, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] -; X86-NEXT: vpsraw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x04] -; X86-NEXT: vpsraw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x05] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpsraw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xe0,0x03] +; X86-NEXT: vpsraw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xe0,0x04] +; X86-NEXT: vpsraw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x05] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_128: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] -; X64-NEXT: vpsraw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x04] -; X64-NEXT: vpsraw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x05] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpsraw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xe0,0x03] +; X64-NEXT: vpsraw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xe0,0x04] +; X64-NEXT: vpsraw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x05] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) + %res0 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3) %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1) - %res3 = add <8 x i16> %res, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] -; X86-NEXT: vpsraw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x04] -; X86-NEXT: vpsraw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x05] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X86-NEXT: vpsraw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xe0,0x03] +; X86-NEXT: vpsraw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xe0,0x04] +; X86-NEXT: vpsraw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x05] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_256: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] -; X64-NEXT: vpsraw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x04] -; X64-NEXT: vpsraw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x05] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X64-NEXT: vpsraw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xe0,0x03] +; X64-NEXT: vpsraw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xe0,0x04] +; X64-NEXT: vpsraw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x05] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3) %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1) - %res3 = add <16 x i16> %res, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] -; X86-NEXT: vpsllw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x04] -; X86-NEXT: vpsllw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x05] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpsllw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xf0,0x03] +; X86-NEXT: vpsllw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xf0,0x04] +; X86-NEXT: vpsllw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x05] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_128: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] -; X64-NEXT: vpsllw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x04] -; X64-NEXT: vpsllw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x05] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpsllw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xf0,0x03] +; X64-NEXT: vpsllw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xf0,0x04] +; X64-NEXT: vpsllw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x05] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) + %res0 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3) %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1) - %res3 = add <8 x i16> %res, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] -; X86-NEXT: vpsllw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x04] -; X86-NEXT: vpsllw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x05] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X86-NEXT: vpsllw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xf0,0x03] +; X86-NEXT: vpsllw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xf0,0x04] +; X86-NEXT: vpsllw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x05] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_256: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] -; X64-NEXT: vpsllw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x04] -; X64-NEXT: vpsllw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x05] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X64-NEXT: vpsllw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xf0,0x03] +; X64-NEXT: vpsllw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xf0,0x04] +; X64-NEXT: vpsllw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x05] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3) %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1) - %res3 = add <16 x i16> %res, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) @@ -6925,63 +6927,69 @@ define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) + %res0 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3, <8 x i16> zeroinitializer, i8 %x4) %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4, <8 x i16> %x3, i8 -1) - %res3 = add <8 x i16> %res, %res1 - %res4 = add <8 x i16> %res2, %res3 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3, <16 x i16> zeroinitializer, i16 %x4) %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4, <16 x i16> %x3, i16 -1) - %res3 = add <16 x i16> %res, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 7742d96247cf3..558985a363bb1 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -1512,33 +1512,32 @@ define <16 x i16> @test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8) -define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { +define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_128: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2] +; X86-NEXT: vpmovwb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc3] ; X86-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] -; X86-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X86-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2] +; X64-NEXT: vpmovwb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc3] ; X64-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] -; X64-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X64-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 + %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 + %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 + %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 + ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 } declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8) @@ -1566,33 +1565,32 @@ define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8) -define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { +define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2] +; X86-NEXT: vpmovswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc3] ; X86-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] -; X86-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X86-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2] +; X64-NEXT: vpmovswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc3] ; X64-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] -; X64-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X64-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 + %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 + %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 + %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 + ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 } declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8) @@ -1620,33 +1618,32 @@ define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8) -define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { +define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2] +; X86-NEXT: vpmovuswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc3] ; X86-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] -; X86-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X86-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2] +; X64-NEXT: vpmovuswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc3] ; X64-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] -; X64-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X64-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 + %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 + %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 + %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 + ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 } declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8) @@ -1990,26 +1987,28 @@ define <16 x i16> @test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x declare <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8>, <16 x i8>, i32) -define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) %2 = bitcast i8 %x4 to <8 x i1> @@ -2018,32 +2017,35 @@ define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %5 = bitcast i8 %x4 to <8 x i1> %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer %7 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4) - %res3 = add <8 x i16> %3, %6 - %res4 = add <8 x i16> %7, %res3 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %6, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %7, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8>, <32 x i8>, i32) -define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) %2 = bitcast i16 %x4 to <16 x i1> @@ -2052,9 +2054,10 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %5 = bitcast i16 %x4 to <16 x i1> %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer %7 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4) - %res3 = add <16 x i16> %3, %6 - %res4 = add <16 x i16> %res3, %7 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %6, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %7, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)