diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-blend.ll b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-blend.ll index 5267c408b73a71..814a4ee163aed1 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-blend.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-blend.ll @@ -59,3 +59,106 @@ entry: %t4 = shufflevector <2 x i32> %t2, <2 x i32> %t3, <2 x i32> ret <2 x i32> %t4 } + +define <64 x i8> @addb_selectw_64xi8(<64 x i8> %t0, <64 x i8> %t1) { +; CHECK-LABEL: addb_selectw_64xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpsubb %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 +; CHECK-NEXT: retq + %t2 = add nsw <64 x i8> %t0, %t1 + %t3 = sub nsw <64 x i8> %t0, %t1 + %t4 = shufflevector <64 x i8> %t2, <64 x i8> %t3, <64 x i32> + ret <64 x i8> %t4 +} + +define <32 x i8> @addb_selectw_32xi8(<32 x i8> %t0, <32 x i8> %t1) { +; CHECK-LABEL: addb_selectw_32xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm2 +; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7] +; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; CHECK-NEXT: retq + %t2 = add nsw <32 x i8> %t0, %t1 + %t3 = sub nsw <32 x i8> %t0, %t1 + %t4 = shufflevector <32 x i8> %t2, <32 x i8> %t3, <32 x i32> + ret <32 x i8> %t4 +} + +define <16 x i8> @addb_selectw_16xi8(<16 x i8> %t0, <16 x i8> %t1) { +; CHECK-LABEL: addb_selectw_16xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm2 +; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7] +; CHECK-NEXT: retq + %t2 = add nsw <16 x i8> %t0, %t1 + %t3 = sub nsw <16 x i8> %t0, %t1 + %t4 = shufflevector <16 x i8> %t2, <16 x i8> %t3, <16 x i32> + ret <16 x i8> %t4 +} + +define <32 x i16> @addw_selectd_32xi16(<32 x i16> %t0, <32 x i16> %t1) { +; CHECK-LABEL: addw_selectd_32xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: movw $1, %ax +; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 +; CHECK-NEXT: retq + %t2 = add nsw <32 x i16> %t0, %t1 + %t3 = sub nsw <32 x i16> %t0, %t1 + %t4 = shufflevector <32 x i16> %t2, <32 x i16> %t3, <32 x i32> + ret <32 x i16> %t4 +} + +define <16 x i16> @addw_selectd_16xi16(<16 x i16> %t0, <16 x i16> %t1) { +; CHECK-LABEL: addw_selectd_16xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm2 +; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7] +; CHECK-NEXT: retq + %t2 = add nsw <16 x i16> %t0, %t1 + %t3 = sub nsw <16 x i16> %t0, %t1 + %t4 = shufflevector <16 x i16> %t2, <16 x i16> %t3, <16 x i32> + ret <16 x i16> %t4 +} + +define <16 x i32> @addd_selectq_16xi32(<16 x i32> %t0, <16 x i32> %t1) { +; CHECK-LABEL: addd_selectq_16xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 +; CHECK-NEXT: retq + %t2 = add nsw <16 x i32> %t0, %t1 + %t3 = sub nsw <16 x i32> %t0, %t1 + %t4 = shufflevector <16 x i32> %t2, <16 x i32> %t3, <16 x i32> + + ret <16 x i32> %t4 +} + +define <8 x i32> @addd_selectq_8xi32(<8 x i32> %t0, <8 x i32> %t1) { +; CHECK-LABEL: addd_selectq_8xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm2 +; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] +; CHECK-NEXT: retq + %t2 = add nsw <8 x i32> %t0, %t1 + %t3 = sub nsw <8 x i32> %t0, %t1 + %t4 = shufflevector <8 x i32> %t2, <8 x i32> %t3, <8 x i32> + + ret <8 x i32> %t4 +}