Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4456,8 +4456,8 @@ SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
bool AllowAVX512 = true) {
assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2");
unsigned NumSubs = 1;
if ((CheckBWI && Subtarget.useBWIRegs()) ||
(!CheckBWI && AllowAVX512 && Subtarget.useAVX512Regs())) {
if (AllowAVX512 && ((CheckBWI && Subtarget.useBWIRegs()) ||
(!CheckBWI && Subtarget.useAVX512Regs()))) {
if (VT.getSizeInBits() > 512) {
NumSubs = VT.getSizeInBits() / 512;
assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size");
Expand Down Expand Up @@ -46197,7 +46197,7 @@ static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS,
SDValue Zero = DAG.getConstant(0, DL, DpVT);

return SplitOpsAndApply(DAG, Subtarget, DL, DpVT, {Zero, DpOp0, DpOp1},
DpBuilder, false);
DpBuilder, /*CheckBWI=*/false, Subtarget.hasVNNI());
}

// Create a PSADBW given two sources representable as zexts of vXi8.
Expand Down
202 changes: 119 additions & 83 deletions llvm/test/CodeGen/X86/dpbusd.ll
Original file line number Diff line number Diff line change
@@ -1,40 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni | FileCheck %s --check-prefixes=AVXVNNI
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni | FileCheck %s --check-prefixes=AVX512,AVX512VNNI
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVNNI
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni | FileCheck %s --check-prefixes=CHECK,AVXVNNI,AVXVNNI-AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVXVNNI,AVXVNNI-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VNNI
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VLVNNI

define i32 @no_dpbusd(ptr%a, ptr%b, i32 %c, i32 %n) {
; AVXVNNI-LABEL: no_dpbusd:
; AVXVNNI: # %bb.0: # %entry
; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVXVNNI-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0
; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-NEXT: vmovd %xmm0, %eax
; AVXVNNI-NEXT: addl %edx, %eax
; AVXVNNI-NEXT: vzeroupper
; AVXVNNI-NEXT: retq
;
; AVX512-LABEL: no_dpbusd:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: addl %edx, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; CHECK-LABEL: no_dpbusd:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; CHECK-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
%0 = load <16 x i8>, ptr %a, align 16
%1 = zext <16 x i8> %0 to <16 x i32>
Expand Down Expand Up @@ -99,25 +84,44 @@ entry:
}

define i32 @mul_zext(ptr%a, ptr%b, i32 %c, i32 %n) {
; AVXVNNI-LABEL: mul_zext:
; AVXVNNI: # %bb.0: # %entry
; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVXVNNI-NEXT: vpmovsxbw (%rsi), %ymm1
; AVXVNNI-NEXT: vpmullw %ymm0, %ymm1, %ymm0
; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVXVNNI-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVXVNNI-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-NEXT: vmovd %xmm0, %eax
; AVXVNNI-NEXT: addl %edx, %eax
; AVXVNNI-NEXT: vzeroupper
; AVXVNNI-NEXT: retq
; AVXVNNI-AVX-LABEL: mul_zext:
; AVXVNNI-AVX: # %bb.0: # %entry
; AVXVNNI-AVX-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVXVNNI-AVX-NEXT: vpmovsxbw (%rsi), %ymm1
; AVXVNNI-AVX-NEXT: vpmullw %ymm0, %ymm1, %ymm0
; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-AVX-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVXVNNI-AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVXVNNI-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX-NEXT: vmovd %xmm0, %eax
; AVXVNNI-AVX-NEXT: addl %edx, %eax
; AVXVNNI-AVX-NEXT: vzeroupper
; AVXVNNI-AVX-NEXT: retq
;
; AVXVNNI-AVX512-LABEL: mul_zext:
; AVXVNNI-AVX512: # %bb.0: # %entry
; AVXVNNI-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVXVNNI-AVX512-NEXT: vpmovsxbw (%rsi), %ymm1
; AVXVNNI-AVX512-NEXT: vpmullw %ymm0, %ymm1, %ymm0
; AVXVNNI-AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVXVNNI-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVXVNNI-AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVXVNNI-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX512-NEXT: vmovd %xmm0, %eax
; AVXVNNI-AVX512-NEXT: addl %edx, %eax
; AVXVNNI-AVX512-NEXT: vzeroupper
; AVXVNNI-AVX512-NEXT: retq
;
; AVX512-LABEL: mul_zext:
; AVX512: # %bb.0: # %entry
Expand Down Expand Up @@ -153,25 +157,44 @@ entry:
}

define i32 @mul_sext(ptr%a, ptr%b, i32 %c, i32 %n) {
; AVXVNNI-LABEL: mul_sext:
; AVXVNNI: # %bb.0: # %entry
; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVXVNNI-NEXT: vpmovsxbw (%rsi), %ymm1
; AVXVNNI-NEXT: vpmullw %ymm0, %ymm1, %ymm0
; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-NEXT: vpmovsxwd %xmm1, %ymm1
; AVXVNNI-NEXT: vpmovsxwd %xmm0, %ymm0
; AVXVNNI-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-NEXT: vmovd %xmm0, %eax
; AVXVNNI-NEXT: addl %edx, %eax
; AVXVNNI-NEXT: vzeroupper
; AVXVNNI-NEXT: retq
; AVXVNNI-AVX-LABEL: mul_sext:
; AVXVNNI-AVX: # %bb.0: # %entry
; AVXVNNI-AVX-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVXVNNI-AVX-NEXT: vpmovsxbw (%rsi), %ymm1
; AVXVNNI-AVX-NEXT: vpmullw %ymm0, %ymm1, %ymm0
; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-AVX-NEXT: vpmovsxwd %xmm1, %ymm1
; AVXVNNI-AVX-NEXT: vpmovsxwd %xmm0, %ymm0
; AVXVNNI-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX-NEXT: vmovd %xmm0, %eax
; AVXVNNI-AVX-NEXT: addl %edx, %eax
; AVXVNNI-AVX-NEXT: vzeroupper
; AVXVNNI-AVX-NEXT: retq
;
; AVXVNNI-AVX512-LABEL: mul_sext:
; AVXVNNI-AVX512: # %bb.0: # %entry
; AVXVNNI-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVXVNNI-AVX512-NEXT: vpmovsxbw (%rsi), %ymm1
; AVXVNNI-AVX512-NEXT: vpmullw %ymm0, %ymm1, %ymm0
; AVXVNNI-AVX512-NEXT: vpmovsxwd %ymm0, %zmm0
; AVXVNNI-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVXVNNI-AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVXVNNI-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVXVNNI-AVX512-NEXT: vmovd %xmm0, %eax
; AVXVNNI-AVX512-NEXT: addl %edx, %eax
; AVXVNNI-AVX512-NEXT: vzeroupper
; AVXVNNI-AVX512-NEXT: retq
;
; AVX512-LABEL: mul_sext:
; AVX512: # %bb.0: # %entry
Expand Down Expand Up @@ -312,17 +335,30 @@ entry:
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)

define i32 @vpdpbusd_128(ptr%a, ptr%b, i32 %c, i32 %n) {
; AVXVNNI-LABEL: vpdpbusd_128:
; AVXVNNI: # %bb.0: # %entry
; AVXVNNI-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVXVNNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVXVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVXVNNI-NEXT: {vex} vpdpbusd %xmm1, %xmm0, %xmm2
; AVXVNNI-NEXT: vmovd %xmm2, %eax
; AVXVNNI-NEXT: addl %edx, %eax
; AVXVNNI-NEXT: retq
; AVXVNNI-AVX-LABEL: vpdpbusd_128:
; AVXVNNI-AVX: # %bb.0: # %entry
; AVXVNNI-AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVXVNNI-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVXVNNI-AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVXVNNI-AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVXVNNI-AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVXVNNI-AVX-NEXT: {vex} vpdpbusd %xmm1, %xmm0, %xmm2
; AVXVNNI-AVX-NEXT: vmovd %xmm2, %eax
; AVXVNNI-AVX-NEXT: addl %edx, %eax
; AVXVNNI-AVX-NEXT: retq
;
; AVXVNNI-AVX512-LABEL: vpdpbusd_128:
; AVXVNNI-AVX512: # %bb.0: # %entry
; AVXVNNI-AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVXVNNI-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVXVNNI-AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVXVNNI-AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVXVNNI-AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVXVNNI-AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVXVNNI-AVX512-NEXT: {vex} vpdpbusd %xmm1, %xmm0, %xmm2
; AVXVNNI-AVX512-NEXT: vmovd %xmm2, %eax
; AVXVNNI-AVX512-NEXT: addl %edx, %eax
; AVXVNNI-AVX512-NEXT: retq
;
; AVX512VNNI-LABEL: vpdpbusd_128:
; AVX512VNNI: # %bb.0: # %entry
Expand Down
Loading