Expand Up
@@ -13,25 +13,25 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1,
ret <8 x i32 > %1
}
define <8 x i32 >@test_int_x86_avx512_mask_vpdpbusd_256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 >* %x2p , <8 x i32 > %x4 , i8 %x3 ) {
define { <8 x i32 >, < 8 x i32 > } @test_int_x86_avx512_mask_vpdpbusd_256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 >* %x2p , <8 x i32 > %x4 , i8 %x3 ) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_256:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x18]
; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x00]
; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xda]
; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X64-NEXT: vpdpbusd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x1f]
; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2]
; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpdpbusd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x07]
; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xda]
; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <8 x i32 >, <8 x i32 >* %x2p
%1 = call <8 x i32 > @llvm.x86.avx512.vpdpbusd.256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 > %x2 )
Expand All
@@ -40,8 +40,9 @@ define <8 x i32>@test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32>
%4 = call <8 x i32 > @llvm.x86.avx512.vpdpbusd.256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 > %x4 )
%5 = bitcast i8 %x3 to <8 x i1 >
%6 = select <8 x i1 > %5 , <8 x i32 > %4 , <8 x i32 > zeroinitializer
%res3 = add <8 x i32 > %3 , %6
ret <8 x i32 > %res3
%res1 = insertvalue { <8 x i32 >, <8 x i32 > } poison, <8 x i32 > %3 , 0
%res2 = insertvalue { <8 x i32 >, <8 x i32 > } %res1 , <8 x i32 > %6 , 1
ret { <8 x i32 >, <8 x i32 > } %res2
}
declare <4 x i32 > @llvm.x86.avx512.vpdpbusd.128 (<4 x i32 >, <4 x i32 >, <4 x i32 >)
Expand All
@@ -55,25 +56,25 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1,
ret <4 x i32 > %1
}
define <4 x i32 >@test_int_x86_avx512_mask_vpdpbusd_128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 >* %x2p , <4 x i32 > %x4 , i8 %x3 ) {
define { <4 x i32 >, < 4 x i32 > } @test_int_x86_avx512_mask_vpdpbusd_128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 >* %x2p , <4 x i32 > %x4 , i8 %x3 ) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_128:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x18]
; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x00]
; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xda]
; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X64-NEXT: vpdpbusd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x1f]
; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2]
; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpdpbusd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x07]
; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xda]
; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <4 x i32 >, <4 x i32 >* %x2p
%1 = call <4 x i32 > @llvm.x86.avx512.vpdpbusd.128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 > %x2 )
Expand All
@@ -84,8 +85,9 @@ define <4 x i32>@test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32>
%5 = bitcast i8 %x3 to <8 x i1 >
%extract1 = shufflevector <8 x i1 > %5 , <8 x i1 > %5 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
%6 = select <4 x i1 > %extract1 , <4 x i32 > %4 , <4 x i32 > zeroinitializer
%res3 = add <4 x i32 > %3 , %6
ret <4 x i32 > %res3
%res1 = insertvalue { <4 x i32 >, <4 x i32 > } poison, <4 x i32 > %3 , 0
%res2 = insertvalue { <4 x i32 >, <4 x i32 > } %res1 , <4 x i32 > %6 , 1
ret { <4 x i32 >, <4 x i32 > } %res2
}
declare <8 x i32 > @llvm.x86.avx512.vpdpbusds.256 (<8 x i32 >, <8 x i32 >, <8 x i32 >)
Expand All
@@ -99,25 +101,25 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1,
ret <8 x i32 > %1
}
define <8 x i32 >@test_int_x86_avx512_mask_vpdpbusds_256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 >* %x2p , <8 x i32 > %x4 , i8 %x3 ) {
define { <8 x i32 >, < 8 x i32 > } @test_int_x86_avx512_mask_vpdpbusds_256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 >* %x2p , <8 x i32 > %x4 , i8 %x3 ) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_256:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x18]
; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x00]
; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xda]
; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X64-NEXT: vpdpbusds (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x1f]
; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2]
; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpdpbusds (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x07]
; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xda]
; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <8 x i32 >, <8 x i32 >* %x2p
%1 = call <8 x i32 > @llvm.x86.avx512.vpdpbusds.256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 > %x2 )
Expand All
@@ -126,8 +128,9 @@ define <8 x i32>@test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32>
%4 = call <8 x i32 > @llvm.x86.avx512.vpdpbusds.256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 > %x4 )
%5 = bitcast i8 %x3 to <8 x i1 >
%6 = select <8 x i1 > %5 , <8 x i32 > %4 , <8 x i32 > zeroinitializer
%res3 = add <8 x i32 > %3 , %6
ret <8 x i32 > %res3
%res1 = insertvalue { <8 x i32 >, <8 x i32 > } poison, <8 x i32 > %3 , 0
%res2 = insertvalue { <8 x i32 >, <8 x i32 > } %res1 , <8 x i32 > %6 , 1
ret { <8 x i32 >, <8 x i32 > } %res2
}
declare <4 x i32 > @llvm.x86.avx512.vpdpbusds.128 (<4 x i32 >, <4 x i32 >, <4 x i32 >)
Expand All
@@ -141,25 +144,25 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1,
ret <4 x i32 > %1
}
define <4 x i32 >@test_int_x86_avx512_mask_vpdpbusds_128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 >* %x2p , <4 x i32 > %x4 , i8 %x3 ) {
define { <4 x i32 >, < 4 x i32 > } @test_int_x86_avx512_mask_vpdpbusds_128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 >* %x2p , <4 x i32 > %x4 , i8 %x3 ) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_128:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x18]
; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x00]
; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xda]
; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X64-NEXT: vpdpbusds (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x1f]
; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2]
; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpdpbusds (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x07]
; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xda]
; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <4 x i32 >, <4 x i32 >* %x2p
%1 = call <4 x i32 > @llvm.x86.avx512.vpdpbusds.128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 > %x2 )
Expand All
@@ -170,8 +173,9 @@ define <4 x i32>@test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32>
%5 = bitcast i8 %x3 to <8 x i1 >
%extract1 = shufflevector <8 x i1 > %5 , <8 x i1 > %5 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
%6 = select <4 x i1 > %extract1 , <4 x i32 > %4 , <4 x i32 > zeroinitializer
%res3 = add <4 x i32 > %3 , %6
ret <4 x i32 > %res3
%res1 = insertvalue { <4 x i32 >, <4 x i32 > } poison, <4 x i32 > %3 , 0
%res2 = insertvalue { <4 x i32 >, <4 x i32 > } %res1 , <4 x i32 > %6 , 1
ret { <4 x i32 >, <4 x i32 > } %res2
}
declare <8 x i32 > @llvm.x86.avx512.vpdpwssd.256 (<8 x i32 >, <8 x i32 >, <8 x i32 >)
Expand All
@@ -185,25 +189,25 @@ define <8 x i32>@test_int_x86_avx512_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1,
ret <8 x i32 > %1
}
define <8 x i32 >@test_int_x86_avx512_mask_vpdpwssd_256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 >* %x2p , <8 x i32 > %x4 , i8 %x3 ) {
define { <8 x i32 >, < 8 x i32 > } @test_int_x86_avx512_mask_vpdpwssd_256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 >* %x2p , <8 x i32 > %x4 , i8 %x3 ) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_256:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x18]
; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x00]
; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xda]
; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X64-NEXT: vpdpwssd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x1f]
; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2]
; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpdpwssd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x07]
; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xda]
; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <8 x i32 >, <8 x i32 >* %x2p
%1 = call <8 x i32 > @llvm.x86.avx512.vpdpwssd.256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 > %x2 )
Expand All
@@ -212,8 +216,9 @@ define <8 x i32>@test_int_x86_avx512_mask_vpdpwssd_256(<8 x i32> %x0, <8 x i32>
%4 = call <8 x i32 > @llvm.x86.avx512.vpdpwssd.256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 > %x4 )
%5 = bitcast i8 %x3 to <8 x i1 >
%6 = select <8 x i1 > %5 , <8 x i32 > %4 , <8 x i32 > zeroinitializer
%res3 = add <8 x i32 > %3 , %6
ret <8 x i32 > %res3
%res1 = insertvalue { <8 x i32 >, <8 x i32 > } poison, <8 x i32 > %3 , 0
%res2 = insertvalue { <8 x i32 >, <8 x i32 > } %res1 , <8 x i32 > %6 , 1
ret { <8 x i32 >, <8 x i32 > } %res2
}
declare <4 x i32 > @llvm.x86.avx512.vpdpwssd.128 (<4 x i32 >, <4 x i32 >, <4 x i32 >)
Expand All
@@ -227,25 +232,25 @@ define <4 x i32>@test_int_x86_avx512_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1,
ret <4 x i32 > %1
}
define <4 x i32 >@test_int_x86_avx512_mask_vpdpwssd_128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 >* %x2p , <4 x i32 > %x4 , i8 %x3 ) {
define { <4 x i32 >, < 4 x i32 > } @test_int_x86_avx512_mask_vpdpwssd_128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 >* %x2p , <4 x i32 > %x4 , i8 %x3 ) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_128:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x18]
; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x00]
; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xda]
; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X64-NEXT: vpdpwssd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x1f]
; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2]
; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpdpwssd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x07]
; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xda]
; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <4 x i32 >, <4 x i32 >* %x2p
%1 = call <4 x i32 > @llvm.x86.avx512.vpdpwssd.128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 > %x2 )
Expand All
@@ -256,8 +261,9 @@ define <4 x i32>@test_int_x86_avx512_mask_vpdpwssd_128(<4 x i32> %x0, <4 x i32>
%5 = bitcast i8 %x3 to <8 x i1 >
%extract1 = shufflevector <8 x i1 > %5 , <8 x i1 > %5 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
%6 = select <4 x i1 > %extract1 , <4 x i32 > %4 , <4 x i32 > zeroinitializer
%res3 = add <4 x i32 > %3 , %6
ret <4 x i32 > %res3
%res1 = insertvalue { <4 x i32 >, <4 x i32 > } poison, <4 x i32 > %3 , 0
%res2 = insertvalue { <4 x i32 >, <4 x i32 > } %res1 , <4 x i32 > %6 , 1
ret { <4 x i32 >, <4 x i32 > } %res2
}
declare <8 x i32 > @llvm.x86.avx512.vpdpwssds.256 (<8 x i32 >, <8 x i32 >, <8 x i32 >)
Expand All
@@ -271,25 +277,25 @@ define <8 x i32>@test_int_x86_avx512_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1,
ret <8 x i32 > %1
}
define <8 x i32 >@test_int_x86_avx512_mask_vpdpwssds_256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 >* %x2p , <8 x i32 > %x4 , i8 %x3 ) {
define { <8 x i32 >, < 8 x i32 > } @test_int_x86_avx512_mask_vpdpwssds_256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 >* %x2p , <8 x i32 > %x4 , i8 %x3 ) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_256:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x18]
; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2]
; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x00]
; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xda]
; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; X64-NEXT: vpdpwssds (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x1f]
; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2]
; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpdpwssds (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x07]
; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xda]
; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <8 x i32 >, <8 x i32 >* %x2p
%1 = call <8 x i32 > @llvm.x86.avx512.vpdpwssds.256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 > %x2 )
Expand All
@@ -298,8 +304,9 @@ define <8 x i32>@test_int_x86_avx512_mask_vpdpwssds_256(<8 x i32> %x0, <8 x i32>
%4 = call <8 x i32 > @llvm.x86.avx512.vpdpwssds.256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 > %x4 )
%5 = bitcast i8 %x3 to <8 x i1 >
%6 = select <8 x i1 > %5 , <8 x i32 > %4 , <8 x i32 > zeroinitializer
%res3 = add <8 x i32 > %3 , %6
ret <8 x i32 > %res3
%res1 = insertvalue { <8 x i32 >, <8 x i32 > } poison, <8 x i32 > %3 , 0
%res2 = insertvalue { <8 x i32 >, <8 x i32 > } %res1 , <8 x i32 > %6 , 1
ret { <8 x i32 >, <8 x i32 > } %res2
}
declare <4 x i32 > @llvm.x86.avx512.vpdpwssds.128 (<4 x i32 >, <4 x i32 >, <4 x i32 >)
Expand All
@@ -320,25 +327,25 @@ define <4 x i32>@test_int_x86_avx512_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1,
ret <4 x i32 > %1
}
define <4 x i32 >@test_int_x86_avx512_mask_vpdpwssds_128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 >* %x2p , <4 x i32 > %x4 , i8 %x3 ) {
define { <4 x i32 >, < 4 x i32 > } @test_int_x86_avx512_mask_vpdpwssds_128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 >* %x2p , <4 x i32 > %x4 , i8 %x3 ) {
; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_128:
; X86: # %bb.0:
; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x18]
; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2]
; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x00]
; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xda]
; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; X64-NEXT: vpdpwssds (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x1f]
; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2]
; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
; X64-NEXT: vpdpwssds (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x07]
; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xda]
; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb]
; X64-NEXT: retq # encoding: [0xc3]
%x2 = load <4 x i32 >, <4 x i32 >* %x2p
%1 = call <4 x i32 > @llvm.x86.avx512.vpdpwssds.128 (<4 x i32 > %x0 , <4 x i32 > %x1 , <4 x i32 > %x2 )
Expand All
@@ -349,6 +356,7 @@ define <4 x i32>@test_int_x86_avx512_mask_vpdpwssds_128(<4 x i32> %x0, <4 x i32>
%5 = bitcast i8 %x3 to <8 x i1 >
%extract1 = shufflevector <8 x i1 > %5 , <8 x i1 > %5 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
%6 = select <4 x i1 > %extract1 , <4 x i32 > %4 , <4 x i32 > zeroinitializer
%res3 = add <4 x i32 > %3 , %6
ret <4 x i32 > %res3
%res1 = insertvalue { <4 x i32 >, <4 x i32 > } poison, <4 x i32 > %3 , 0
%res2 = insertvalue { <4 x i32 >, <4 x i32 > } %res1 , <4 x i32 > %6 , 1
ret { <4 x i32 >, <4 x i32 > } %res2
}