471 changes: 471 additions & 0 deletions clang/lib/Headers/avxvnniint8intrin.h

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions clang/lib/Headers/cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@
#define bit_AVXIFMA 0x00800000

/* Features in %edx for leaf 7 sub-leaf 1 */
#define bit_AVXVNNIINT8 0x00000010
#define bit_PREFETCHI 0x00004000

/* Features in %eax for leaf 13 sub-leaf 1 */
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Headers/immintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,11 @@
#include <gfniintrin.h>
#endif

#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVXVNNIINT8__)
#include <avxvnniint8intrin.h>
#endif

#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDPID__)
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/attr-target-x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ void __attribute__((target("arch=x86-64-v4"))) x86_64_v4(void) {}
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-NOT: tune-cpu
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
// CHECK-NOT: tune-cpu
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
Expand Down
76 changes: 76 additions & 0 deletions clang/test/CodeGen/avxvnniint8-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64- -target-feature +avxvnniint8 -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -ffreestanding %s -triple=i386- -target-feature +avxvnniint8 -emit-llvm -o - -Wall -Werror | FileCheck %s

#include <immintrin.h>

// CHECK-LABEL: @test_mm_dpbssd_epi32(
// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpbssd.128
__m128i test_mm_dpbssd_epi32(__m128i __W, __m128i __A, __m128i __B) {
return _mm_dpbssd_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm_dpbssds_epi32(
// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpbssds.128
__m128i test_mm_dpbssds_epi32(__m128i __W, __m128i __A, __m128i __B) {
return _mm_dpbssds_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm_dpbsud_epi32(
// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpbsud.128
__m128i test_mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B) {
return _mm_dpbsud_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm_dpbsuds_epi32(
// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpbsuds.128
__m128i test_mm_dpbsuds_epi32(__m128i __W, __m128i __A, __m128i __B) {
return _mm_dpbsuds_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm_dpbuud_epi32(
// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpbuud.128
__m128i test_mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B) {
return _mm_dpbuud_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm_dpbuuds_epi32(
// CHECK: call <4 x i32> @llvm.x86.avx2.vpdpbuuds.128
__m128i test_mm_dpbuuds_epi32(__m128i __W, __m128i __A, __m128i __B) {
return _mm_dpbuuds_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm256_dpbssd_epi32(
// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpbssd.256
__m256i test_mm256_dpbssd_epi32(__m256i __W, __m256i __A, __m256i __B) {
return _mm256_dpbssd_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm256_dpbssds_epi32(
// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpbssds.256
__m256i test_mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B) {
return _mm256_dpbssds_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm256_dpbsud_epi32(
// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpbsud.256
__m256i test_mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B) {
return _mm256_dpbsud_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm256_dpbsuds_epi32(
// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpbsuds.256
__m256i test_mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B) {
return _mm256_dpbsuds_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm256_dpbuud_epi32(
// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpbuud.256
__m256i test_mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B) {
return _mm256_dpbuud_epi32(__W, __A, __B);
}

// CHECK-LABEL: @test_mm256_dpbuuds_epi32(
// CHECK: call <8 x i32> @llvm.x86.avx2.vpdpbuuds.256
__m256i test_mm256_dpbuuds_epi32(__m256i __W, __m256i __A, __m256i __B) {
return _mm256_dpbuuds_epi32(__W, __A, __B);
}
5 changes: 5 additions & 0 deletions clang/test/Driver/x86-target-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,11 @@
// AVXIFMA: "-target-feature" "+avxifma"
// NO-AVXIFMA: "-target-feature" "-avxifma"

// RUN: %clang --target=i386 -mavxvnniint8 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX-VNNIINT8 %s
// RUN: %clang --target=i386 -mno-avxvnniint8 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX-VNNIINT8 %s
// AVX-VNNIINT8: "-target-feature" "+avxvnniint8"
// NO-AVX-VNNIINT8: "-target-feature" "-avxvnniint8"

// RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
// RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
// CRC32: "-target-feature" "+crc32"
Expand Down
14 changes: 14 additions & 0 deletions clang/test/Preprocessor/x86_target_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,20 @@

// NO-RAOINT-NOT: #define __RAOINT__ 1

// RUN: %clang -target i386-unknown-unknown -march=atom -mavxvnniint8 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXVNNIINT8 %s

// AVXVNNIINT8: #define __AVX2__ 1
// AVXVNNIINT8: #define __AVXVNNIINT8__ 1

// RUN: %clang -target i386-unknown-unknown -march=atom -mno-avxvnniint8 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOAVXVNNIINT8 %s

// NOAVXVNNIINT8-NOT: #define __AVXVNNIINT8__ 1

// RUN: %clang -target i386-unknown-unknown -march=atom -mavxvnniint8 -mno-avx2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXVNNIINT8NOAVX2 %s

// AVXVNNIINT8NOAVX2-NOT: #define __AVX2__ 1
// AVXVNNIINT8NOAVX2-NOT: #define __AVXVNNIINT8__ 1

// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s

// CRC32: #define __CRC32__ 1
Expand Down
1 change: 1 addition & 0 deletions llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ Changes to the X86 Backend
* Add support for the ``WRMSRNS`` instruction.
* Support ISA of ``AMX-FP16`` which contains ``tdpfp16ps`` instruction.
* Support ISA of ``CMPCCXADD``.
* Support ISA of ``AVX-VNNI-INT8``.

Changes to the OCaml bindings
-----------------------------
Expand Down
60 changes: 60 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1929,6 +1929,66 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
ClangBuiltin<"__builtin_ia32_vpdpwssds512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx2_vpdpbssd_128
: ClangBuiltin<"__builtin_ia32_vpdpbssd128">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbssd_256
: ClangBuiltin<"__builtin_ia32_vpdpbssd256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbssds_128
: ClangBuiltin<"__builtin_ia32_vpdpbssds128">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbssds_256
: ClangBuiltin<"__builtin_ia32_vpdpbssds256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbsud_128
: ClangBuiltin<"__builtin_ia32_vpdpbsud128">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbsud_256
: ClangBuiltin<"__builtin_ia32_vpdpbsud256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbsuds_128
: ClangBuiltin<"__builtin_ia32_vpdpbsuds128">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbsuds_256
: ClangBuiltin<"__builtin_ia32_vpdpbsuds256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbuud_128
: ClangBuiltin<"__builtin_ia32_vpdpbuud128">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbuud_256
: ClangBuiltin<"__builtin_ia32_vpdpbuud256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbuuds_128
: ClangBuiltin<"__builtin_ia32_vpdpbuuds128">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_vpdpbuuds_256
: ClangBuiltin<"__builtin_ia32_vpdpbuuds256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
}

//===----------------------------------------------------------------------===//
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/X86TargetParser.def
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ X86_FEATURE (AMX_FP16, "amx-fp16")
X86_FEATURE (CMPCCXADD, "cmpccxadd")
X86_FEATURE (AVXVNNI, "avxvnni")
X86_FEATURE (AVXIFMA, "avxifma")
X86_FEATURE (AVXVNNIINT8, "avxvnniint8")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1812,6 +1812,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);

bool HasLeafD = MaxLevel >= 0xd &&
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/X86TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ constexpr FeatureBitset ImpliedFeaturesAMX_FP16 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesHRESET = {};

constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {};
constexpr FeatureBitset ImpliedFeaturesCMPCCXADD = {};
constexpr FeatureBitset ImpliedFeaturesRAOINT = {};
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true",
"Support 16-bit floating point",
[FeatureBWI, FeatureVLX, FeatureDQI]>;
def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8",
"HasAVXVNNIINT8", "true",
"Enable AVX-VNNI-INT8",
[FeatureAVX2]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34221,6 +34221,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ENQCMD)
NODE_NAME_CASE(ENQCMDS)
NODE_NAME_CASE(VP2INTERSECT)
NODE_NAME_CASE(VPDPBSUD)
NODE_NAME_CASE(VPDPBSUDS)
NODE_NAME_CASE(VPDPBUUD)
NODE_NAME_CASE(VPDPBUUDS)
NODE_NAME_CASE(VPDPBSSD)
NODE_NAME_CASE(VPDPBSSDS)
NODE_NAME_CASE(AESENC128KL)
NODE_NAME_CASE(AESDEC128KL)
NODE_NAME_CASE(AESENC256KL)
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,13 @@ namespace llvm {
VFCMULCSH,
VFCMULCSH_RND,

VPDPBSUD,
VPDPBSUDS,
VPDPBUUD,
VPDPBUUDS,
VPDPBSSD,
VPDPBSSDS,

// Compress and expand.
COMPRESS,
EXPAND,
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/X86/X86InstrFoldTables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3995,6 +3995,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmk, 0 },
{ X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmk, 0 },
{ X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmk, 0 },
{ X86::VPDPBSSDSYrr, X86::VPDPBSSDSYrm, 0 },
{ X86::VPDPBSSDSrr, X86::VPDPBSSDSrm, 0 },
{ X86::VPDPBSSDYrr, X86::VPDPBSSDYrm, 0 },
{ X86::VPDPBSSDrr, X86::VPDPBSSDrm, 0 },
{ X86::VPDPBSUDSYrr, X86::VPDPBSUDSYrm, 0 },
{ X86::VPDPBSUDSrr, X86::VPDPBSUDSrm, 0 },
{ X86::VPDPBSUDYrr, X86::VPDPBSUDYrm, 0 },
{ X86::VPDPBSUDrr, X86::VPDPBSUDrm, 0 },
{ X86::VPDPBUSDSYrr, X86::VPDPBUSDSYrm, 0 },
{ X86::VPDPBUSDSZ128r, X86::VPDPBUSDSZ128m, 0 },
{ X86::VPDPBUSDSZ256r, X86::VPDPBUSDSZ256m, 0 },
Expand All @@ -4005,6 +4013,10 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VPDPBUSDZ256r, X86::VPDPBUSDZ256m, 0 },
{ X86::VPDPBUSDZr, X86::VPDPBUSDZm, 0 },
{ X86::VPDPBUSDrr, X86::VPDPBUSDrm, 0 },
{ X86::VPDPBUUDSYrr, X86::VPDPBUUDSYrm, 0 },
{ X86::VPDPBUUDSrr, X86::VPDPBUUDSrm, 0 },
{ X86::VPDPBUUDYrr, X86::VPDPBUUDYrm, 0 },
{ X86::VPDPBUUDrr, X86::VPDPBUUDrm, 0 },
{ X86::VPDPWSSDSYrr, X86::VPDPWSSDSYrm, 0 },
{ X86::VPDPWSSDSZ128r, X86::VPDPWSSDSZ128m, 0 },
{ X86::VPDPWSSDSZ256r, X86::VPDPWSSDSZ256m, 0 },
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,13 @@ def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store
SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>
]>;

def X86vpdpbssd : SDNode<"X86ISD::VPDPBSSD", SDTVnni>;
def X86vpdpbssds : SDNode<"X86ISD::VPDPBSSDS", SDTVnni>;
def X86vpdpbsud : SDNode<"X86ISD::VPDPBSUD", SDTVnni>;
def X86vpdpbsuds : SDNode<"X86ISD::VPDPBSUDS", SDTVnni>;
def X86vpdpbuud : SDNode<"X86ISD::VPDPBUUD", SDTVnni>;
def X86vpdpbuuds : SDNode<"X86ISD::VPDPBUUDS", SDTVnni>;

//===----------------------------------------------------------------------===//
// SSE pattern fragments
//===----------------------------------------------------------------------===//
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2555,6 +2555,14 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VPDPWSSDrr:
case X86::VPDPWSSDSYrr:
case X86::VPDPWSSDSrr:
case X86::VPDPBSSDSrr:
case X86::VPDPBSSDSYrr:
case X86::VPDPBSSDrr:
case X86::VPDPBSSDYrr:
case X86::VPDPBUUDSrr:
case X86::VPDPBUUDSYrr:
case X86::VPDPBUUDrr:
case X86::VPDPBUUDYrr:
case X86::VPDPWSSDZ128r:
case X86::VPDPWSSDZ128rk:
case X86::VPDPWSSDZ128rkz:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,7 @@ def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">;
def HasAVXVNNIINT8 : Predicate<"Subtarget->hasAVXVNNIINT8()">;
def HasAVXVNNI : Predicate <"Subtarget->hasAVXVNNI()">;
def NoVLX_Or_NoVNNI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVNNI()">;

Expand Down
59 changes: 59 additions & 0 deletions llvm/lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -8160,3 +8160,62 @@ multiclass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> {

defm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, VEX_W, ExplicitVEXPrefix;
defm VPMADD52LUQ : avx_ifma_rm<0xb4, "vpmadd52luq", x86vpmadd52l>, VEX_W, ExplicitVEXPrefix;

let Constraints = "$src1 = $dst" in
multiclass avx_dotprod_rm<bits<8> Opc, string OpcodeStr, ValueType OpVT,
RegisterClass RC, PatFrag MemOpFrag,
X86MemOperand X86memop, SDNode OpNode,
X86FoldableSchedWrite Sched,
bit IsCommutable> {
let isCommutable = IsCommutable in
def rr : I<Opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
VEX_4V, Sched<[Sched]>;
def rm : I<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, X86memop:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
(MemOpFrag addr:$src3))))]>,
VEX_4V, Sched<[Sched.Folded, Sched.ReadAfterFold]>;
}

let Predicates = [HasAVXVNNIINT8] in {
defm VPDPBSSD : avx_dotprod_rm<0x50,"vpdpbssd", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbssd, SchedWriteVecIMul.XMM,
1>, T8XD;
defm VPDPBSSDY : avx_dotprod_rm<0x50,"vpdpbssd", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbssd, SchedWriteVecIMul.YMM,
1>, VEX_L, T8XD;
defm VPDPBUUD : avx_dotprod_rm<0x50,"vpdpbuud", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbuud, SchedWriteVecIMul.XMM,
1>, T8PS;
defm VPDPBUUDY : avx_dotprod_rm<0x50,"vpdpbuud", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbuud, SchedWriteVecIMul.YMM,
1>, VEX_L, T8PS;
defm VPDPBSSDS : avx_dotprod_rm<0x51,"vpdpbssds", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbssds, SchedWriteVecIMul.XMM,
1>, T8XD;
defm VPDPBSSDSY : avx_dotprod_rm<0x51,"vpdpbssds", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbssds, SchedWriteVecIMul.YMM,
1>, VEX_L, T8XD;
defm VPDPBUUDS : avx_dotprod_rm<0x51,"vpdpbuuds", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbuuds, SchedWriteVecIMul.XMM,
1>, T8PS;
defm VPDPBUUDSY : avx_dotprod_rm<0x51,"vpdpbuuds", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbuuds, SchedWriteVecIMul.YMM,
1>, VEX_L, T8PS;
defm VPDPBSUD : avx_dotprod_rm<0x50,"vpdpbsud", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbsud, SchedWriteVecIMul.XMM,
0>, T8XS;
defm VPDPBSUDY : avx_dotprod_rm<0x50,"vpdpbsud", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbsud, SchedWriteVecIMul.YMM,
0>, VEX_L, T8XS;
defm VPDPBSUDS : avx_dotprod_rm<0x51,"vpdpbsuds", v4i32, VR128, loadv4i32,
i128mem, X86vpdpbsuds, SchedWriteVecIMul.XMM,
0>, T8XS;
defm VPDPBSUDSY : avx_dotprod_rm<0x51,"vpdpbsuds", v8i32, VR256, loadv8i32,
i256mem, X86vpdpbsuds, SchedWriteVecIMul.YMM,
0>, VEX_L, T8XS;
}
12 changes: 12 additions & 0 deletions llvm/lib/Target/X86/X86IntrinsicsInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx2_vpdpbssd_128, INTR_TYPE_3OP, X86ISD::VPDPBSSD, 0),
X86_INTRINSIC_DATA(avx2_vpdpbssd_256, INTR_TYPE_3OP, X86ISD::VPDPBSSD, 0),
X86_INTRINSIC_DATA(avx2_vpdpbssds_128, INTR_TYPE_3OP, X86ISD::VPDPBSSDS, 0),
X86_INTRINSIC_DATA(avx2_vpdpbssds_256, INTR_TYPE_3OP, X86ISD::VPDPBSSDS, 0),
X86_INTRINSIC_DATA(avx2_vpdpbsud_128, INTR_TYPE_3OP, X86ISD::VPDPBSUD, 0),
X86_INTRINSIC_DATA(avx2_vpdpbsud_256, INTR_TYPE_3OP, X86ISD::VPDPBSUD, 0),
X86_INTRINSIC_DATA(avx2_vpdpbsuds_128, INTR_TYPE_3OP, X86ISD::VPDPBSUDS, 0),
X86_INTRINSIC_DATA(avx2_vpdpbsuds_256, INTR_TYPE_3OP, X86ISD::VPDPBSUDS, 0),
X86_INTRINSIC_DATA(avx2_vpdpbuud_128, INTR_TYPE_3OP, X86ISD::VPDPBUUD, 0),
X86_INTRINSIC_DATA(avx2_vpdpbuud_256, INTR_TYPE_3OP, X86ISD::VPDPBUUD, 0),
X86_INTRINSIC_DATA(avx2_vpdpbuuds_128, INTR_TYPE_3OP, X86ISD::VPDPBUUDS, 0),
X86_INTRINSIC_DATA(avx2_vpdpbuuds_256, INTR_TYPE_3OP, X86ISD::VPDPBUUDS, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_conflict_d_128, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
Expand Down
316 changes: 316 additions & 0 deletions llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll

Large diffs are not rendered by default.

355 changes: 355 additions & 0 deletions llvm/test/CodeGen/X86/stack-folding-int-avxvnniint8.ll

Large diffs are not rendered by default.

243 changes: 243 additions & 0 deletions llvm/test/MC/Disassembler/X86/avx-vnni-int8-32.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
# RUN: llvm-mc --disassemble %s -triple=i686 | FileCheck %s --check-prefixes=ATT
# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL

# ATT: vpdpbssd %ymm4, %ymm3, %ymm2
# INTEL: vpdpbssd ymm2, ymm3, ymm4
0xc4,0xe2,0x67,0x50,0xd4

# ATT: vpdpbssd %xmm4, %xmm3, %xmm2
# INTEL: vpdpbssd xmm2, xmm3, xmm4
0xc4,0xe2,0x63,0x50,0xd4

# ATT: vpdpbssd 268435456(%esp,%esi,8), %ymm3, %ymm2
# INTEL: vpdpbssd ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x67,0x50,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbssd 291(%edi,%eax,4), %ymm3, %ymm2
# INTEL: vpdpbssd ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x67,0x50,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbssd (%eax), %ymm3, %ymm2
# INTEL: vpdpbssd ymm2, ymm3, ymmword ptr [eax]
0xc4,0xe2,0x67,0x50,0x10

# ATT: vpdpbssd -1024(,%ebp,2), %ymm3, %ymm2
# INTEL: vpdpbssd ymm2, ymm3, ymmword ptr [2*ebp - 1024]
0xc4,0xe2,0x67,0x50,0x14,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbssd 268435456(%esp,%esi,8), %xmm3, %xmm2
# INTEL: vpdpbssd xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x63,0x50,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbssd 291(%edi,%eax,4), %xmm3, %xmm2
# INTEL: vpdpbssd xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x63,0x50,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbssd (%eax), %xmm3, %xmm2
# INTEL: vpdpbssd xmm2, xmm3, xmmword ptr [eax]
0xc4,0xe2,0x63,0x50,0x10

# ATT: vpdpbssd -512(,%ebp,2), %xmm3, %xmm2
# INTEL: vpdpbssd xmm2, xmm3, xmmword ptr [2*ebp - 512]
0xc4,0xe2,0x63,0x50,0x14,0x6d,0x00,0xfe,0xff,0xff

# ATT: vpdpbssds %ymm4, %ymm3, %ymm2
# INTEL: vpdpbssds ymm2, ymm3, ymm4
0xc4,0xe2,0x67,0x51,0xd4

# ATT: vpdpbssds %xmm4, %xmm3, %xmm2
# INTEL: vpdpbssds xmm2, xmm3, xmm4
0xc4,0xe2,0x63,0x51,0xd4

# ATT: vpdpbssds 268435456(%esp,%esi,8), %ymm3, %ymm2
# INTEL: vpdpbssds ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x67,0x51,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbssds 291(%edi,%eax,4), %ymm3, %ymm2
# INTEL: vpdpbssds ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x67,0x51,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbssds (%eax), %ymm3, %ymm2
# INTEL: vpdpbssds ymm2, ymm3, ymmword ptr [eax]
0xc4,0xe2,0x67,0x51,0x10

# ATT: vpdpbssds -1024(,%ebp,2), %ymm3, %ymm2
# INTEL: vpdpbssds ymm2, ymm3, ymmword ptr [2*ebp - 1024]
0xc4,0xe2,0x67,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbssds 268435456(%esp,%esi,8), %xmm3, %xmm2
# INTEL: vpdpbssds xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x63,0x51,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbssds 291(%edi,%eax,4), %xmm3, %xmm2
# INTEL: vpdpbssds xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x63,0x51,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbssds (%eax), %xmm3, %xmm2
# INTEL: vpdpbssds xmm2, xmm3, xmmword ptr [eax]
0xc4,0xe2,0x63,0x51,0x10

# ATT: vpdpbssds -512(,%ebp,2), %xmm3, %xmm2
# INTEL: vpdpbssds xmm2, xmm3, xmmword ptr [2*ebp - 512]
0xc4,0xe2,0x63,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff

# ATT: vpdpbsud %ymm4, %ymm3, %ymm2
# INTEL: vpdpbsud ymm2, ymm3, ymm4
0xc4,0xe2,0x66,0x50,0xd4

# ATT: vpdpbsud %xmm4, %xmm3, %xmm2
# INTEL: vpdpbsud xmm2, xmm3, xmm4
0xc4,0xe2,0x62,0x50,0xd4

# ATT: vpdpbsud 268435456(%esp,%esi,8), %ymm3, %ymm2
# INTEL: vpdpbsud ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x66,0x50,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbsud 291(%edi,%eax,4), %ymm3, %ymm2
# INTEL: vpdpbsud ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x66,0x50,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbsud (%eax), %ymm3, %ymm2
# INTEL: vpdpbsud ymm2, ymm3, ymmword ptr [eax]
0xc4,0xe2,0x66,0x50,0x10

# ATT: vpdpbsud -1024(,%ebp,2), %ymm3, %ymm2
# INTEL: vpdpbsud ymm2, ymm3, ymmword ptr [2*ebp - 1024]
0xc4,0xe2,0x66,0x50,0x14,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbsud 268435456(%esp,%esi,8), %xmm3, %xmm2
# INTEL: vpdpbsud xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x62,0x50,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbsud 291(%edi,%eax,4), %xmm3, %xmm2
# INTEL: vpdpbsud xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x62,0x50,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbsud (%eax), %xmm3, %xmm2
# INTEL: vpdpbsud xmm2, xmm3, xmmword ptr [eax]
0xc4,0xe2,0x62,0x50,0x10

# ATT: vpdpbsud -512(,%ebp,2), %xmm3, %xmm2
# INTEL: vpdpbsud xmm2, xmm3, xmmword ptr [2*ebp - 512]
0xc4,0xe2,0x62,0x50,0x14,0x6d,0x00,0xfe,0xff,0xff

# ATT: vpdpbsuds %ymm4, %ymm3, %ymm2
# INTEL: vpdpbsuds ymm2, ymm3, ymm4
0xc4,0xe2,0x66,0x51,0xd4

# ATT: vpdpbsuds %xmm4, %xmm3, %xmm2
# INTEL: vpdpbsuds xmm2, xmm3, xmm4
0xc4,0xe2,0x62,0x51,0xd4

# ATT: vpdpbsuds 268435456(%esp,%esi,8), %ymm3, %ymm2
# INTEL: vpdpbsuds ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x66,0x51,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbsuds 291(%edi,%eax,4), %ymm3, %ymm2
# INTEL: vpdpbsuds ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x66,0x51,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbsuds (%eax), %ymm3, %ymm2
# INTEL: vpdpbsuds ymm2, ymm3, ymmword ptr [eax]
0xc4,0xe2,0x66,0x51,0x10

# ATT: vpdpbsuds -1024(,%ebp,2), %ymm3, %ymm2
# INTEL: vpdpbsuds ymm2, ymm3, ymmword ptr [2*ebp - 1024]
0xc4,0xe2,0x66,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbsuds 268435456(%esp,%esi,8), %xmm3, %xmm2
# INTEL: vpdpbsuds xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x62,0x51,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbsuds 291(%edi,%eax,4), %xmm3, %xmm2
# INTEL: vpdpbsuds xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x62,0x51,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbsuds (%eax), %xmm3, %xmm2
# INTEL: vpdpbsuds xmm2, xmm3, xmmword ptr [eax]
0xc4,0xe2,0x62,0x51,0x10

# ATT: vpdpbsuds -512(,%ebp,2), %xmm3, %xmm2
# INTEL: vpdpbsuds xmm2, xmm3, xmmword ptr [2*ebp - 512]
0xc4,0xe2,0x62,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff

# ATT: vpdpbuud %ymm4, %ymm3, %ymm2
# INTEL: vpdpbuud ymm2, ymm3, ymm4
0xc4,0xe2,0x64,0x50,0xd4

# ATT: vpdpbuud %xmm4, %xmm3, %xmm2
# INTEL: vpdpbuud xmm2, xmm3, xmm4
0xc4,0xe2,0x60,0x50,0xd4

# ATT: vpdpbuud 268435456(%esp,%esi,8), %ymm3, %ymm2
# INTEL: vpdpbuud ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x64,0x50,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbuud 291(%edi,%eax,4), %ymm3, %ymm2
# INTEL: vpdpbuud ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x64,0x50,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbuud (%eax), %ymm3, %ymm2
# INTEL: vpdpbuud ymm2, ymm3, ymmword ptr [eax]
0xc4,0xe2,0x64,0x50,0x10

# ATT: vpdpbuud -1024(,%ebp,2), %ymm3, %ymm2
# INTEL: vpdpbuud ymm2, ymm3, ymmword ptr [2*ebp - 1024]
0xc4,0xe2,0x64,0x50,0x14,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbuud 268435456(%esp,%esi,8), %xmm3, %xmm2
# INTEL: vpdpbuud xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x60,0x50,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbuud 291(%edi,%eax,4), %xmm3, %xmm2
# INTEL: vpdpbuud xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x60,0x50,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbuud (%eax), %xmm3, %xmm2
# INTEL: vpdpbuud xmm2, xmm3, xmmword ptr [eax]
0xc4,0xe2,0x60,0x50,0x10

# ATT: vpdpbuud -512(,%ebp,2), %xmm3, %xmm2
# INTEL: vpdpbuud xmm2, xmm3, xmmword ptr [2*ebp - 512]
0xc4,0xe2,0x60,0x50,0x14,0x6d,0x00,0xfe,0xff,0xff

# ATT: vpdpbuuds %ymm4, %ymm3, %ymm2
# INTEL: vpdpbuuds ymm2, ymm3, ymm4
0xc4,0xe2,0x64,0x51,0xd4

# ATT: vpdpbuuds %xmm4, %xmm3, %xmm2
# INTEL: vpdpbuuds xmm2, xmm3, xmm4
0xc4,0xe2,0x60,0x51,0xd4

# ATT: vpdpbuuds 268435456(%esp,%esi,8), %ymm3, %ymm2
# INTEL: vpdpbuuds ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x64,0x51,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbuuds 291(%edi,%eax,4), %ymm3, %ymm2
# INTEL: vpdpbuuds ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x64,0x51,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbuuds (%eax), %ymm3, %ymm2
# INTEL: vpdpbuuds ymm2, ymm3, ymmword ptr [eax]
0xc4,0xe2,0x64,0x51,0x10

# ATT: vpdpbuuds -1024(,%ebp,2), %ymm3, %ymm2
# INTEL: vpdpbuuds ymm2, ymm3, ymmword ptr [2*ebp - 1024]
0xc4,0xe2,0x64,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbuuds 268435456(%esp,%esi,8), %xmm3, %xmm2
# INTEL: vpdpbuuds xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0x60,0x51,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: vpdpbuuds 291(%edi,%eax,4), %xmm3, %xmm2
# INTEL: vpdpbuuds xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0x60,0x51,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: vpdpbuuds (%eax), %xmm3, %xmm2
# INTEL: vpdpbuuds xmm2, xmm3, xmmword ptr [eax]
0xc4,0xe2,0x60,0x51,0x10

# ATT: vpdpbuuds -512(,%ebp,2), %xmm3, %xmm2
# INTEL: vpdpbuuds xmm2, xmm3, xmmword ptr [2*ebp - 512]
0xc4,0xe2,0x60,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff

243 changes: 243 additions & 0 deletions llvm/test/MC/Disassembler/X86/avx-vnni-int8-64.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL

# ATT: vpdpbssd %ymm14, %ymm13, %ymm12
# INTEL: vpdpbssd ymm12, ymm13, ymm14
0xc4,0x42,0x17,0x50,0xe6

# ATT: vpdpbssd %xmm14, %xmm13, %xmm12
# INTEL: vpdpbssd xmm12, xmm13, xmm14
0xc4,0x42,0x13,0x50,0xe6

# ATT: vpdpbssd 268435456(%rbp,%r14,8), %ymm13, %ymm12
# INTEL: vpdpbssd ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x17,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbssd 291(%r8,%rax,4), %ymm13, %ymm12
# INTEL: vpdpbssd ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x17,0x50,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbssd (%rip), %ymm13, %ymm12
# INTEL: vpdpbssd ymm12, ymm13, ymmword ptr [rip]
0xc4,0x62,0x17,0x50,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbssd -1024(,%rbp,2), %ymm13, %ymm12
# INTEL: vpdpbssd ymm12, ymm13, ymmword ptr [2*rbp - 1024]
0xc4,0x62,0x17,0x50,0x24,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbssd 268435456(%rbp,%r14,8), %xmm13, %xmm12
# INTEL: vpdpbssd xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x13,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbssd 291(%r8,%rax,4), %xmm13, %xmm12
# INTEL: vpdpbssd xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x13,0x50,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbssd (%rip), %xmm13, %xmm12
# INTEL: vpdpbssd xmm12, xmm13, xmmword ptr [rip]
0xc4,0x62,0x13,0x50,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbssd -512(,%rbp,2), %xmm13, %xmm12
# INTEL: vpdpbssd xmm12, xmm13, xmmword ptr [2*rbp - 512]
0xc4,0x62,0x13,0x50,0x24,0x6d,0x00,0xfe,0xff,0xff

# ATT: vpdpbssds %ymm14, %ymm13, %ymm12
# INTEL: vpdpbssds ymm12, ymm13, ymm14
0xc4,0x42,0x17,0x51,0xe6

# ATT: vpdpbssds %xmm14, %xmm13, %xmm12
# INTEL: vpdpbssds xmm12, xmm13, xmm14
0xc4,0x42,0x13,0x51,0xe6

# ATT: vpdpbssds 268435456(%rbp,%r14,8), %ymm13, %ymm12
# INTEL: vpdpbssds ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x17,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbssds 291(%r8,%rax,4), %ymm13, %ymm12
# INTEL: vpdpbssds ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x17,0x51,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbssds (%rip), %ymm13, %ymm12
# INTEL: vpdpbssds ymm12, ymm13, ymmword ptr [rip]
0xc4,0x62,0x17,0x51,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbssds -1024(,%rbp,2), %ymm13, %ymm12
# INTEL: vpdpbssds ymm12, ymm13, ymmword ptr [2*rbp - 1024]
0xc4,0x62,0x17,0x51,0x24,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbssds 268435456(%rbp,%r14,8), %xmm13, %xmm12
# INTEL: vpdpbssds xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x13,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbssds 291(%r8,%rax,4), %xmm13, %xmm12
# INTEL: vpdpbssds xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x13,0x51,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbssds (%rip), %xmm13, %xmm12
# INTEL: vpdpbssds xmm12, xmm13, xmmword ptr [rip]
0xc4,0x62,0x13,0x51,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbssds -512(,%rbp,2), %xmm13, %xmm12
# INTEL: vpdpbssds xmm12, xmm13, xmmword ptr [2*rbp - 512]
0xc4,0x62,0x13,0x51,0x24,0x6d,0x00,0xfe,0xff,0xff

# ATT: vpdpbsud %ymm14, %ymm13, %ymm12
# INTEL: vpdpbsud ymm12, ymm13, ymm14
0xc4,0x42,0x16,0x50,0xe6

# ATT: vpdpbsud %xmm14, %xmm13, %xmm12
# INTEL: vpdpbsud xmm12, xmm13, xmm14
0xc4,0x42,0x12,0x50,0xe6

# ATT: vpdpbsud 268435456(%rbp,%r14,8), %ymm13, %ymm12
# INTEL: vpdpbsud ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x16,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbsud 291(%r8,%rax,4), %ymm13, %ymm12
# INTEL: vpdpbsud ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x16,0x50,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbsud (%rip), %ymm13, %ymm12
# INTEL: vpdpbsud ymm12, ymm13, ymmword ptr [rip]
0xc4,0x62,0x16,0x50,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbsud -1024(,%rbp,2), %ymm13, %ymm12
# INTEL: vpdpbsud ymm12, ymm13, ymmword ptr [2*rbp - 1024]
0xc4,0x62,0x16,0x50,0x24,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbsud 268435456(%rbp,%r14,8), %xmm13, %xmm12
# INTEL: vpdpbsud xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x12,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbsud 291(%r8,%rax,4), %xmm13, %xmm12
# INTEL: vpdpbsud xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x12,0x50,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbsud (%rip), %xmm13, %xmm12
# INTEL: vpdpbsud xmm12, xmm13, xmmword ptr [rip]
0xc4,0x62,0x12,0x50,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbsud -512(,%rbp,2), %xmm13, %xmm12
# INTEL: vpdpbsud xmm12, xmm13, xmmword ptr [2*rbp - 512]
0xc4,0x62,0x12,0x50,0x24,0x6d,0x00,0xfe,0xff,0xff

# ATT: vpdpbsuds %ymm14, %ymm13, %ymm12
# INTEL: vpdpbsuds ymm12, ymm13, ymm14
0xc4,0x42,0x16,0x51,0xe6

# ATT: vpdpbsuds %xmm14, %xmm13, %xmm12
# INTEL: vpdpbsuds xmm12, xmm13, xmm14
0xc4,0x42,0x12,0x51,0xe6

# ATT: vpdpbsuds 268435456(%rbp,%r14,8), %ymm13, %ymm12
# INTEL: vpdpbsuds ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x16,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbsuds 291(%r8,%rax,4), %ymm13, %ymm12
# INTEL: vpdpbsuds ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x16,0x51,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbsuds (%rip), %ymm13, %ymm12
# INTEL: vpdpbsuds ymm12, ymm13, ymmword ptr [rip]
0xc4,0x62,0x16,0x51,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbsuds -1024(,%rbp,2), %ymm13, %ymm12
# INTEL: vpdpbsuds ymm12, ymm13, ymmword ptr [2*rbp - 1024]
0xc4,0x62,0x16,0x51,0x24,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbsuds 268435456(%rbp,%r14,8), %xmm13, %xmm12
# INTEL: vpdpbsuds xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x12,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbsuds 291(%r8,%rax,4), %xmm13, %xmm12
# INTEL: vpdpbsuds xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x12,0x51,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbsuds (%rip), %xmm13, %xmm12
# INTEL: vpdpbsuds xmm12, xmm13, xmmword ptr [rip]
0xc4,0x62,0x12,0x51,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbsuds -512(,%rbp,2), %xmm13, %xmm12
# INTEL: vpdpbsuds xmm12, xmm13, xmmword ptr [2*rbp - 512]
0xc4,0x62,0x12,0x51,0x24,0x6d,0x00,0xfe,0xff,0xff

# ATT: vpdpbuud %ymm14, %ymm13, %ymm12
# INTEL: vpdpbuud ymm12, ymm13, ymm14
0xc4,0x42,0x14,0x50,0xe6

# ATT: vpdpbuud %xmm14, %xmm13, %xmm12
# INTEL: vpdpbuud xmm12, xmm13, xmm14
0xc4,0x42,0x10,0x50,0xe6

# ATT: vpdpbuud 268435456(%rbp,%r14,8), %ymm13, %ymm12
# INTEL: vpdpbuud ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x14,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbuud 291(%r8,%rax,4), %ymm13, %ymm12
# INTEL: vpdpbuud ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x14,0x50,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbuud (%rip), %ymm13, %ymm12
# INTEL: vpdpbuud ymm12, ymm13, ymmword ptr [rip]
0xc4,0x62,0x14,0x50,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbuud -1024(,%rbp,2), %ymm13, %ymm12
# INTEL: vpdpbuud ymm12, ymm13, ymmword ptr [2*rbp - 1024]
0xc4,0x62,0x14,0x50,0x24,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbuud 268435456(%rbp,%r14,8), %xmm13, %xmm12
# INTEL: vpdpbuud xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x10,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbuud 291(%r8,%rax,4), %xmm13, %xmm12
# INTEL: vpdpbuud xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x10,0x50,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbuud (%rip), %xmm13, %xmm12
# INTEL: vpdpbuud xmm12, xmm13, xmmword ptr [rip]
0xc4,0x62,0x10,0x50,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbuud -512(,%rbp,2), %xmm13, %xmm12
# INTEL: vpdpbuud xmm12, xmm13, xmmword ptr [2*rbp - 512]
0xc4,0x62,0x10,0x50,0x24,0x6d,0x00,0xfe,0xff,0xff

# ATT: vpdpbuuds %ymm14, %ymm13, %ymm12
# INTEL: vpdpbuuds ymm12, ymm13, ymm14
0xc4,0x42,0x14,0x51,0xe6

# ATT: vpdpbuuds %xmm14, %xmm13, %xmm12
# INTEL: vpdpbuuds xmm12, xmm13, xmm14
0xc4,0x42,0x10,0x51,0xe6

# ATT: vpdpbuuds 268435456(%rbp,%r14,8), %ymm13, %ymm12
# INTEL: vpdpbuuds ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x14,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbuuds 291(%r8,%rax,4), %ymm13, %ymm12
# INTEL: vpdpbuuds ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x14,0x51,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbuuds (%rip), %ymm13, %ymm12
# INTEL: vpdpbuuds ymm12, ymm13, ymmword ptr [rip]
0xc4,0x62,0x14,0x51,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbuuds -1024(,%rbp,2), %ymm13, %ymm12
# INTEL: vpdpbuuds ymm12, ymm13, ymmword ptr [2*rbp - 1024]
0xc4,0x62,0x14,0x51,0x24,0x6d,0x00,0xfc,0xff,0xff

# ATT: vpdpbuuds 268435456(%rbp,%r14,8), %xmm13, %xmm12
# INTEL: vpdpbuuds xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x10,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: vpdpbuuds 291(%r8,%rax,4), %xmm13, %xmm12
# INTEL: vpdpbuuds xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x10,0x51,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: vpdpbuuds (%rip), %xmm13, %xmm12
# INTEL: vpdpbuuds xmm12, xmm13, xmmword ptr [rip]
0xc4,0x62,0x10,0x51,0x25,0x00,0x00,0x00,0x00

# ATT: vpdpbuuds -512(,%rbp,2), %xmm13, %xmm12
# INTEL: vpdpbuuds xmm12, xmm13, xmmword ptr [2*rbp - 512]
0xc4,0x62,0x10,0x51,0x24,0x6d,0x00,0xfe,0xff,0xff

241 changes: 241 additions & 0 deletions llvm/test/MC/X86/avx_vnni_int8-32-att.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
// RUN: llvm-mc -triple i686-unknown-unknown -mattr=+avxvnniint8 --show-encoding %s | FileCheck %s

// CHECK: vpdpbssd %ymm4, %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x67,0x50,0xd4]
vpdpbssd %ymm4, %ymm3, %ymm2

// CHECK: vpdpbssd %xmm4, %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x63,0x50,0xd4]
vpdpbssd %xmm4, %xmm3, %xmm2

// CHECK: vpdpbssd 268435456(%esp,%esi,8), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x67,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbssd 268435456(%esp,%esi,8), %ymm3, %ymm2

// CHECK: vpdpbssd 291(%edi,%eax,4), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x67,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbssd 291(%edi,%eax,4), %ymm3, %ymm2

// CHECK: vpdpbssd (%eax), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x67,0x50,0x10]
vpdpbssd (%eax), %ymm3, %ymm2

// CHECK: vpdpbssd -1024(,%ebp,2), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x67,0x50,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbssd -1024(,%ebp,2), %ymm3, %ymm2

// CHECK: vpdpbssd 268435456(%esp,%esi,8), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x63,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbssd 268435456(%esp,%esi,8), %xmm3, %xmm2

// CHECK: vpdpbssd 291(%edi,%eax,4), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x63,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbssd 291(%edi,%eax,4), %xmm3, %xmm2

// CHECK: vpdpbssd (%eax), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x63,0x50,0x10]
vpdpbssd (%eax), %xmm3, %xmm2

// CHECK: vpdpbssd -512(,%ebp,2), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x63,0x50,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbssd -512(,%ebp,2), %xmm3, %xmm2

// CHECK: vpdpbssds %ymm4, %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x67,0x51,0xd4]
vpdpbssds %ymm4, %ymm3, %ymm2

// CHECK: vpdpbssds %xmm4, %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x63,0x51,0xd4]
vpdpbssds %xmm4, %xmm3, %xmm2

// CHECK: vpdpbssds 268435456(%esp,%esi,8), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x67,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbssds 268435456(%esp,%esi,8), %ymm3, %ymm2

// CHECK: vpdpbssds 291(%edi,%eax,4), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x67,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbssds 291(%edi,%eax,4), %ymm3, %ymm2

// CHECK: vpdpbssds (%eax), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x67,0x51,0x10]
vpdpbssds (%eax), %ymm3, %ymm2

// CHECK: vpdpbssds -1024(,%ebp,2), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x67,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbssds -1024(,%ebp,2), %ymm3, %ymm2

// CHECK: vpdpbssds 268435456(%esp,%esi,8), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x63,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbssds 268435456(%esp,%esi,8), %xmm3, %xmm2

// CHECK: vpdpbssds 291(%edi,%eax,4), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x63,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbssds 291(%edi,%eax,4), %xmm3, %xmm2

// CHECK: vpdpbssds (%eax), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x63,0x51,0x10]
vpdpbssds (%eax), %xmm3, %xmm2

// CHECK: vpdpbssds -512(,%ebp,2), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x63,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbssds -512(,%ebp,2), %xmm3, %xmm2

// CHECK: vpdpbsud %ymm4, %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x66,0x50,0xd4]
vpdpbsud %ymm4, %ymm3, %ymm2

// CHECK: vpdpbsud %xmm4, %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x62,0x50,0xd4]
vpdpbsud %xmm4, %xmm3, %xmm2

// CHECK: vpdpbsud 268435456(%esp,%esi,8), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x66,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbsud 268435456(%esp,%esi,8), %ymm3, %ymm2

// CHECK: vpdpbsud 291(%edi,%eax,4), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x66,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbsud 291(%edi,%eax,4), %ymm3, %ymm2

// CHECK: vpdpbsud (%eax), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x66,0x50,0x10]
vpdpbsud (%eax), %ymm3, %ymm2

// CHECK: vpdpbsud -1024(,%ebp,2), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x66,0x50,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbsud -1024(,%ebp,2), %ymm3, %ymm2

// CHECK: vpdpbsud 268435456(%esp,%esi,8), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x62,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbsud 268435456(%esp,%esi,8), %xmm3, %xmm2

// CHECK: vpdpbsud 291(%edi,%eax,4), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x62,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbsud 291(%edi,%eax,4), %xmm3, %xmm2

// CHECK: vpdpbsud (%eax), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x62,0x50,0x10]
vpdpbsud (%eax), %xmm3, %xmm2

// CHECK: vpdpbsud -512(,%ebp,2), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x62,0x50,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbsud -512(,%ebp,2), %xmm3, %xmm2

// CHECK: vpdpbsuds %ymm4, %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x66,0x51,0xd4]
vpdpbsuds %ymm4, %ymm3, %ymm2

// CHECK: vpdpbsuds %xmm4, %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x62,0x51,0xd4]
vpdpbsuds %xmm4, %xmm3, %xmm2

// CHECK: vpdpbsuds 268435456(%esp,%esi,8), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x66,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbsuds 268435456(%esp,%esi,8), %ymm3, %ymm2

// CHECK: vpdpbsuds 291(%edi,%eax,4), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x66,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbsuds 291(%edi,%eax,4), %ymm3, %ymm2

// CHECK: vpdpbsuds (%eax), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x66,0x51,0x10]
vpdpbsuds (%eax), %ymm3, %ymm2

// CHECK: vpdpbsuds -1024(,%ebp,2), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x66,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbsuds -1024(,%ebp,2), %ymm3, %ymm2

// CHECK: vpdpbsuds 268435456(%esp,%esi,8), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x62,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbsuds 268435456(%esp,%esi,8), %xmm3, %xmm2

// CHECK: vpdpbsuds 291(%edi,%eax,4), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x62,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbsuds 291(%edi,%eax,4), %xmm3, %xmm2

// CHECK: vpdpbsuds (%eax), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x62,0x51,0x10]
vpdpbsuds (%eax), %xmm3, %xmm2

// CHECK: vpdpbsuds -512(,%ebp,2), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x62,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbsuds -512(,%ebp,2), %xmm3, %xmm2

// CHECK: vpdpbuud %ymm4, %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x64,0x50,0xd4]
vpdpbuud %ymm4, %ymm3, %ymm2

// CHECK: vpdpbuud %xmm4, %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x60,0x50,0xd4]
vpdpbuud %xmm4, %xmm3, %xmm2

// CHECK: vpdpbuud 268435456(%esp,%esi,8), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x64,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbuud 268435456(%esp,%esi,8), %ymm3, %ymm2

// CHECK: vpdpbuud 291(%edi,%eax,4), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x64,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbuud 291(%edi,%eax,4), %ymm3, %ymm2

// CHECK: vpdpbuud (%eax), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x64,0x50,0x10]
vpdpbuud (%eax), %ymm3, %ymm2

// CHECK: vpdpbuud -1024(,%ebp,2), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x64,0x50,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbuud -1024(,%ebp,2), %ymm3, %ymm2

// CHECK: vpdpbuud 268435456(%esp,%esi,8), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x60,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbuud 268435456(%esp,%esi,8), %xmm3, %xmm2

// CHECK: vpdpbuud 291(%edi,%eax,4), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x60,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbuud 291(%edi,%eax,4), %xmm3, %xmm2

// CHECK: vpdpbuud (%eax), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x60,0x50,0x10]
vpdpbuud (%eax), %xmm3, %xmm2

// CHECK: vpdpbuud -512(,%ebp,2), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x60,0x50,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbuud -512(,%ebp,2), %xmm3, %xmm2

// CHECK: vpdpbuuds %ymm4, %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x64,0x51,0xd4]
vpdpbuuds %ymm4, %ymm3, %ymm2

// CHECK: vpdpbuuds %xmm4, %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x60,0x51,0xd4]
vpdpbuuds %xmm4, %xmm3, %xmm2

// CHECK: vpdpbuuds 268435456(%esp,%esi,8), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x64,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbuuds 268435456(%esp,%esi,8), %ymm3, %ymm2

// CHECK: vpdpbuuds 291(%edi,%eax,4), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x64,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbuuds 291(%edi,%eax,4), %ymm3, %ymm2

// CHECK: vpdpbuuds (%eax), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x64,0x51,0x10]
vpdpbuuds (%eax), %ymm3, %ymm2

// CHECK: vpdpbuuds -1024(,%ebp,2), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0x64,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbuuds -1024(,%ebp,2), %ymm3, %ymm2

// CHECK: vpdpbuuds 268435456(%esp,%esi,8), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x60,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbuuds 268435456(%esp,%esi,8), %xmm3, %xmm2

// CHECK: vpdpbuuds 291(%edi,%eax,4), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x60,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbuuds 291(%edi,%eax,4), %xmm3, %xmm2

// CHECK: vpdpbuuds (%eax), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x60,0x51,0x10]
vpdpbuuds (%eax), %xmm3, %xmm2

// CHECK: vpdpbuuds -512(,%ebp,2), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0x60,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbuuds -512(,%ebp,2), %xmm3, %xmm2
242 changes: 242 additions & 0 deletions llvm/test/MC/X86/avx_vnni_int8-32-intel.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
// RUN: llvm-mc -triple i686-unknown-unknown -mattr=+avxvnniint8 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s

// CHECK: vpdpbssd ymm2, ymm3, ymm4
// CHECK: encoding: [0xc4,0xe2,0x67,0x50,0xd4]
vpdpbssd ymm2, ymm3, ymm4

// CHECK: vpdpbssd xmm2, xmm3, xmm4
// CHECK: encoding: [0xc4,0xe2,0x63,0x50,0xd4]
vpdpbssd xmm2, xmm3, xmm4

// CHECK: vpdpbssd ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x67,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbssd ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbssd ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x67,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbssd ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbssd ymm2, ymm3, ymmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x67,0x50,0x10]
vpdpbssd ymm2, ymm3, ymmword ptr [eax]

// CHECK: vpdpbssd ymm2, ymm3, ymmword ptr [2*ebp - 1024]
// CHECK: encoding: [0xc4,0xe2,0x67,0x50,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbssd ymm2, ymm3, ymmword ptr [2*ebp - 1024]

// CHECK: vpdpbssd xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x63,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbssd xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbssd xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x63,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbssd xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbssd xmm2, xmm3, xmmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x63,0x50,0x10]
vpdpbssd xmm2, xmm3, xmmword ptr [eax]

// CHECK: vpdpbssd xmm2, xmm3, xmmword ptr [2*ebp - 512]
// CHECK: encoding: [0xc4,0xe2,0x63,0x50,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbssd xmm2, xmm3, xmmword ptr [2*ebp - 512]

// CHECK: vpdpbssds ymm2, ymm3, ymm4
// CHECK: encoding: [0xc4,0xe2,0x67,0x51,0xd4]
vpdpbssds ymm2, ymm3, ymm4

// CHECK: vpdpbssds xmm2, xmm3, xmm4
// CHECK: encoding: [0xc4,0xe2,0x63,0x51,0xd4]
vpdpbssds xmm2, xmm3, xmm4

// CHECK: vpdpbssds ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x67,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbssds ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbssds ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x67,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbssds ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbssds ymm2, ymm3, ymmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x67,0x51,0x10]
vpdpbssds ymm2, ymm3, ymmword ptr [eax]

// CHECK: vpdpbssds ymm2, ymm3, ymmword ptr [2*ebp - 1024]
// CHECK: encoding: [0xc4,0xe2,0x67,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbssds ymm2, ymm3, ymmword ptr [2*ebp - 1024]

// CHECK: vpdpbssds xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x63,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbssds xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbssds xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x63,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbssds xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbssds xmm2, xmm3, xmmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x63,0x51,0x10]
vpdpbssds xmm2, xmm3, xmmword ptr [eax]

// CHECK: vpdpbssds xmm2, xmm3, xmmword ptr [2*ebp - 512]
// CHECK: encoding: [0xc4,0xe2,0x63,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbssds xmm2, xmm3, xmmword ptr [2*ebp - 512]

// CHECK: vpdpbsud ymm2, ymm3, ymm4
// CHECK: encoding: [0xc4,0xe2,0x66,0x50,0xd4]
vpdpbsud ymm2, ymm3, ymm4

// CHECK: vpdpbsud xmm2, xmm3, xmm4
// CHECK: encoding: [0xc4,0xe2,0x62,0x50,0xd4]
vpdpbsud xmm2, xmm3, xmm4

// CHECK: vpdpbsud ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x66,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbsud ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbsud ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x66,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbsud ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbsud ymm2, ymm3, ymmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x66,0x50,0x10]
vpdpbsud ymm2, ymm3, ymmword ptr [eax]

// CHECK: vpdpbsud ymm2, ymm3, ymmword ptr [2*ebp - 1024]
// CHECK: encoding: [0xc4,0xe2,0x66,0x50,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbsud ymm2, ymm3, ymmword ptr [2*ebp - 1024]

// CHECK: vpdpbsud xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x62,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbsud xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbsud xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x62,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbsud xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbsud xmm2, xmm3, xmmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x62,0x50,0x10]
vpdpbsud xmm2, xmm3, xmmword ptr [eax]

// CHECK: vpdpbsud xmm2, xmm3, xmmword ptr [2*ebp - 512]
// CHECK: encoding: [0xc4,0xe2,0x62,0x50,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbsud xmm2, xmm3, xmmword ptr [2*ebp - 512]

// CHECK: vpdpbsuds ymm2, ymm3, ymm4
// CHECK: encoding: [0xc4,0xe2,0x66,0x51,0xd4]
vpdpbsuds ymm2, ymm3, ymm4

// CHECK: vpdpbsuds xmm2, xmm3, xmm4
// CHECK: encoding: [0xc4,0xe2,0x62,0x51,0xd4]
vpdpbsuds xmm2, xmm3, xmm4

// CHECK: vpdpbsuds ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x66,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbsuds ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbsuds ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x66,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbsuds ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbsuds ymm2, ymm3, ymmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x66,0x51,0x10]
vpdpbsuds ymm2, ymm3, ymmword ptr [eax]

// CHECK: vpdpbsuds ymm2, ymm3, ymmword ptr [2*ebp - 1024]
// CHECK: encoding: [0xc4,0xe2,0x66,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbsuds ymm2, ymm3, ymmword ptr [2*ebp - 1024]

// CHECK: vpdpbsuds xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x62,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbsuds xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbsuds xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x62,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbsuds xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbsuds xmm2, xmm3, xmmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x62,0x51,0x10]
vpdpbsuds xmm2, xmm3, xmmword ptr [eax]

// CHECK: vpdpbsuds xmm2, xmm3, xmmword ptr [2*ebp - 512]
// CHECK: encoding: [0xc4,0xe2,0x62,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbsuds xmm2, xmm3, xmmword ptr [2*ebp - 512]

// CHECK: vpdpbuud ymm2, ymm3, ymm4
// CHECK: encoding: [0xc4,0xe2,0x64,0x50,0xd4]
vpdpbuud ymm2, ymm3, ymm4

// CHECK: vpdpbuud xmm2, xmm3, xmm4
// CHECK: encoding: [0xc4,0xe2,0x60,0x50,0xd4]
vpdpbuud xmm2, xmm3, xmm4

// CHECK: vpdpbuud ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x64,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbuud ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbuud ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x64,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbuud ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbuud ymm2, ymm3, ymmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x64,0x50,0x10]
vpdpbuud ymm2, ymm3, ymmword ptr [eax]

// CHECK: vpdpbuud ymm2, ymm3, ymmword ptr [2*ebp - 1024]
// CHECK: encoding: [0xc4,0xe2,0x64,0x50,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbuud ymm2, ymm3, ymmword ptr [2*ebp - 1024]

// CHECK: vpdpbuud xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x60,0x50,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbuud xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbuud xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x60,0x50,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbuud xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbuud xmm2, xmm3, xmmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x60,0x50,0x10]
vpdpbuud xmm2, xmm3, xmmword ptr [eax]

// CHECK: vpdpbuud xmm2, xmm3, xmmword ptr [2*ebp - 512]
// CHECK: encoding: [0xc4,0xe2,0x60,0x50,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbuud xmm2, xmm3, xmmword ptr [2*ebp - 512]

// CHECK: vpdpbuuds ymm2, ymm3, ymm4
// CHECK: encoding: [0xc4,0xe2,0x64,0x51,0xd4]
vpdpbuuds ymm2, ymm3, ymm4

// CHECK: vpdpbuuds xmm2, xmm3, xmm4
// CHECK: encoding: [0xc4,0xe2,0x60,0x51,0xd4]
vpdpbuuds xmm2, xmm3, xmm4

// CHECK: vpdpbuuds ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x64,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbuuds ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbuuds ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x64,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbuuds ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbuuds ymm2, ymm3, ymmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x64,0x51,0x10]
vpdpbuuds ymm2, ymm3, ymmword ptr [eax]

// CHECK: vpdpbuuds ymm2, ymm3, ymmword ptr [2*ebp - 1024]
// CHECK: encoding: [0xc4,0xe2,0x64,0x51,0x14,0x6d,0x00,0xfc,0xff,0xff]
vpdpbuuds ymm2, ymm3, ymmword ptr [2*ebp - 1024]

// CHECK: vpdpbuuds xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0x60,0x51,0x94,0xf4,0x00,0x00,0x00,0x10]
vpdpbuuds xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]

// CHECK: vpdpbuuds xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0x60,0x51,0x94,0x87,0x23,0x01,0x00,0x00]
vpdpbuuds xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]

// CHECK: vpdpbuuds xmm2, xmm3, xmmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0x60,0x51,0x10]
vpdpbuuds xmm2, xmm3, xmmword ptr [eax]

// CHECK: vpdpbuuds xmm2, xmm3, xmmword ptr [2*ebp - 512]
// CHECK: encoding: [0xc4,0xe2,0x60,0x51,0x14,0x6d,0x00,0xfe,0xff,0xff]
vpdpbuuds xmm2, xmm3, xmmword ptr [2*ebp - 512]

242 changes: 242 additions & 0 deletions llvm/test/MC/X86/avx_vnni_int8-64-att.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
// RUN: llvm-mc -triple=x86_64-unknown-unknown -mattr=+avxvnniint8 --show-encoding < %s | FileCheck %s

// CHECK: vpdpbssd %ymm14, %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x17,0x50,0xe6]
vpdpbssd %ymm14, %ymm13, %ymm12

// CHECK: vpdpbssd %xmm14, %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x13,0x50,0xe6]
vpdpbssd %xmm14, %xmm13, %xmm12

// CHECK: vpdpbssd 268435456(%rbp,%r14,8), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x22,0x17,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbssd 268435456(%rbp,%r14,8), %ymm13, %ymm12

// CHECK: vpdpbssd 291(%r8,%rax,4), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x17,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbssd 291(%r8,%rax,4), %ymm13, %ymm12

// CHECK: vpdpbssd (%rip), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x17,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbssd (%rip), %ymm13, %ymm12

// CHECK: vpdpbssd -1024(,%rbp,2), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x17,0x50,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbssd -1024(,%rbp,2), %ymm13, %ymm12

// CHECK: vpdpbssd 268435456(%rbp,%r14,8), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x22,0x13,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbssd 268435456(%rbp,%r14,8), %xmm13, %xmm12

// CHECK: vpdpbssd 291(%r8,%rax,4), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x13,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbssd 291(%r8,%rax,4), %xmm13, %xmm12

// CHECK: vpdpbssd (%rip), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x13,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbssd (%rip), %xmm13, %xmm12

// CHECK: vpdpbssd -512(,%rbp,2), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x13,0x50,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbssd -512(,%rbp,2), %xmm13, %xmm12

// CHECK: vpdpbssds %ymm14, %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x17,0x51,0xe6]
vpdpbssds %ymm14, %ymm13, %ymm12

// CHECK: vpdpbssds %xmm14, %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x13,0x51,0xe6]
vpdpbssds %xmm14, %xmm13, %xmm12

// CHECK: vpdpbssds 268435456(%rbp,%r14,8), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x22,0x17,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbssds 268435456(%rbp,%r14,8), %ymm13, %ymm12

// CHECK: vpdpbssds 291(%r8,%rax,4), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x17,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbssds 291(%r8,%rax,4), %ymm13, %ymm12

// CHECK: vpdpbssds (%rip), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x17,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbssds (%rip), %ymm13, %ymm12

// CHECK: vpdpbssds -1024(,%rbp,2), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x17,0x51,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbssds -1024(,%rbp,2), %ymm13, %ymm12

// CHECK: vpdpbssds 268435456(%rbp,%r14,8), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x22,0x13,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbssds 268435456(%rbp,%r14,8), %xmm13, %xmm12

// CHECK: vpdpbssds 291(%r8,%rax,4), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x13,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbssds 291(%r8,%rax,4), %xmm13, %xmm12

// CHECK: vpdpbssds (%rip), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x13,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbssds (%rip), %xmm13, %xmm12

// CHECK: vpdpbssds -512(,%rbp,2), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x13,0x51,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbssds -512(,%rbp,2), %xmm13, %xmm12

// CHECK: vpdpbsud %ymm14, %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x16,0x50,0xe6]
vpdpbsud %ymm14, %ymm13, %ymm12

// CHECK: vpdpbsud %xmm14, %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x12,0x50,0xe6]
vpdpbsud %xmm14, %xmm13, %xmm12

// CHECK: vpdpbsud 268435456(%rbp,%r14,8), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x22,0x16,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbsud 268435456(%rbp,%r14,8), %ymm13, %ymm12

// CHECK: vpdpbsud 291(%r8,%rax,4), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x16,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbsud 291(%r8,%rax,4), %ymm13, %ymm12

// CHECK: vpdpbsud (%rip), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x16,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbsud (%rip), %ymm13, %ymm12

// CHECK: vpdpbsud -1024(,%rbp,2), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x16,0x50,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbsud -1024(,%rbp,2), %ymm13, %ymm12

// CHECK: vpdpbsud 268435456(%rbp,%r14,8), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x22,0x12,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbsud 268435456(%rbp,%r14,8), %xmm13, %xmm12

// CHECK: vpdpbsud 291(%r8,%rax,4), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x12,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbsud 291(%r8,%rax,4), %xmm13, %xmm12

// CHECK: vpdpbsud (%rip), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x12,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbsud (%rip), %xmm13, %xmm12

// CHECK: vpdpbsud -512(,%rbp,2), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x12,0x50,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbsud -512(,%rbp,2), %xmm13, %xmm12

// CHECK: vpdpbsuds %ymm14, %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x16,0x51,0xe6]
vpdpbsuds %ymm14, %ymm13, %ymm12

// CHECK: vpdpbsuds %xmm14, %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x12,0x51,0xe6]
vpdpbsuds %xmm14, %xmm13, %xmm12

// CHECK: vpdpbsuds 268435456(%rbp,%r14,8), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x22,0x16,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbsuds 268435456(%rbp,%r14,8), %ymm13, %ymm12

// CHECK: vpdpbsuds 291(%r8,%rax,4), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x16,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbsuds 291(%r8,%rax,4), %ymm13, %ymm12

// CHECK: vpdpbsuds (%rip), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x16,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbsuds (%rip), %ymm13, %ymm12

// CHECK: vpdpbsuds -1024(,%rbp,2), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x16,0x51,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbsuds -1024(,%rbp,2), %ymm13, %ymm12

// CHECK: vpdpbsuds 268435456(%rbp,%r14,8), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x22,0x12,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbsuds 268435456(%rbp,%r14,8), %xmm13, %xmm12

// CHECK: vpdpbsuds 291(%r8,%rax,4), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x12,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbsuds 291(%r8,%rax,4), %xmm13, %xmm12

// CHECK: vpdpbsuds (%rip), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x12,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbsuds (%rip), %xmm13, %xmm12

// CHECK: vpdpbsuds -512(,%rbp,2), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x12,0x51,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbsuds -512(,%rbp,2), %xmm13, %xmm12

// CHECK: vpdpbuud %ymm14, %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x14,0x50,0xe6]
vpdpbuud %ymm14, %ymm13, %ymm12

// CHECK: vpdpbuud %xmm14, %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x10,0x50,0xe6]
vpdpbuud %xmm14, %xmm13, %xmm12

// CHECK: vpdpbuud 268435456(%rbp,%r14,8), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x22,0x14,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbuud 268435456(%rbp,%r14,8), %ymm13, %ymm12

// CHECK: vpdpbuud 291(%r8,%rax,4), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x14,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbuud 291(%r8,%rax,4), %ymm13, %ymm12

// CHECK: vpdpbuud (%rip), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x14,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbuud (%rip), %ymm13, %ymm12

// CHECK: vpdpbuud -1024(,%rbp,2), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x14,0x50,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbuud -1024(,%rbp,2), %ymm13, %ymm12

// CHECK: vpdpbuud 268435456(%rbp,%r14,8), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x22,0x10,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbuud 268435456(%rbp,%r14,8), %xmm13, %xmm12

// CHECK: vpdpbuud 291(%r8,%rax,4), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x10,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbuud 291(%r8,%rax,4), %xmm13, %xmm12

// CHECK: vpdpbuud (%rip), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x10,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbuud (%rip), %xmm13, %xmm12

// CHECK: vpdpbuud -512(,%rbp,2), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x10,0x50,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbuud -512(,%rbp,2), %xmm13, %xmm12

// CHECK: vpdpbuuds %ymm14, %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x14,0x51,0xe6]
vpdpbuuds %ymm14, %ymm13, %ymm12

// CHECK: vpdpbuuds %xmm14, %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x10,0x51,0xe6]
vpdpbuuds %xmm14, %xmm13, %xmm12

// CHECK: vpdpbuuds 268435456(%rbp,%r14,8), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x22,0x14,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbuuds 268435456(%rbp,%r14,8), %ymm13, %ymm12

// CHECK: vpdpbuuds 291(%r8,%rax,4), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x14,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbuuds 291(%r8,%rax,4), %ymm13, %ymm12

// CHECK: vpdpbuuds (%rip), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x14,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbuuds (%rip), %ymm13, %ymm12

// CHECK: vpdpbuuds -1024(,%rbp,2), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x14,0x51,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbuuds -1024(,%rbp,2), %ymm13, %ymm12

// CHECK: vpdpbuuds 268435456(%rbp,%r14,8), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x22,0x10,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbuuds 268435456(%rbp,%r14,8), %xmm13, %xmm12

// CHECK: vpdpbuuds 291(%r8,%rax,4), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x10,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbuuds 291(%r8,%rax,4), %xmm13, %xmm12

// CHECK: vpdpbuuds (%rip), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x10,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbuuds (%rip), %xmm13, %xmm12

// CHECK: vpdpbuuds -512(,%rbp,2), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x10,0x51,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbuuds -512(,%rbp,2), %xmm13, %xmm12

242 changes: 242 additions & 0 deletions llvm/test/MC/X86/avx_vnni_int8-64-intel.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxvnniint8 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s

// CHECK: vpdpbssd ymm12, ymm13, ymm14
// CHECK: encoding: [0xc4,0x42,0x17,0x50,0xe6]
vpdpbssd ymm12, ymm13, ymm14

// CHECK: vpdpbssd xmm12, xmm13, xmm14
// CHECK: encoding: [0xc4,0x42,0x13,0x50,0xe6]
vpdpbssd xmm12, xmm13, xmm14

// CHECK: vpdpbssd ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x17,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbssd ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbssd ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x17,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbssd ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbssd ymm12, ymm13, ymmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x17,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbssd ymm12, ymm13, ymmword ptr [rip]

// CHECK: vpdpbssd ymm12, ymm13, ymmword ptr [2*rbp - 1024]
// CHECK: encoding: [0xc4,0x62,0x17,0x50,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbssd ymm12, ymm13, ymmword ptr [2*rbp - 1024]

// CHECK: vpdpbssd xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x13,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbssd xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbssd xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x13,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbssd xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbssd xmm12, xmm13, xmmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x13,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbssd xmm12, xmm13, xmmword ptr [rip]

// CHECK: vpdpbssd xmm12, xmm13, xmmword ptr [2*rbp - 512]
// CHECK: encoding: [0xc4,0x62,0x13,0x50,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbssd xmm12, xmm13, xmmword ptr [2*rbp - 512]

// CHECK: vpdpbssds ymm12, ymm13, ymm14
// CHECK: encoding: [0xc4,0x42,0x17,0x51,0xe6]
vpdpbssds ymm12, ymm13, ymm14

// CHECK: vpdpbssds xmm12, xmm13, xmm14
// CHECK: encoding: [0xc4,0x42,0x13,0x51,0xe6]
vpdpbssds xmm12, xmm13, xmm14

// CHECK: vpdpbssds ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x17,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbssds ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbssds ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x17,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbssds ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbssds ymm12, ymm13, ymmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x17,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbssds ymm12, ymm13, ymmword ptr [rip]

// CHECK: vpdpbssds ymm12, ymm13, ymmword ptr [2*rbp - 1024]
// CHECK: encoding: [0xc4,0x62,0x17,0x51,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbssds ymm12, ymm13, ymmword ptr [2*rbp - 1024]

// CHECK: vpdpbssds xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x13,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbssds xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbssds xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x13,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbssds xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbssds xmm12, xmm13, xmmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x13,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbssds xmm12, xmm13, xmmword ptr [rip]

// CHECK: vpdpbssds xmm12, xmm13, xmmword ptr [2*rbp - 512]
// CHECK: encoding: [0xc4,0x62,0x13,0x51,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbssds xmm12, xmm13, xmmword ptr [2*rbp - 512]

// CHECK: vpdpbsud ymm12, ymm13, ymm14
// CHECK: encoding: [0xc4,0x42,0x16,0x50,0xe6]
vpdpbsud ymm12, ymm13, ymm14

// CHECK: vpdpbsud xmm12, xmm13, xmm14
// CHECK: encoding: [0xc4,0x42,0x12,0x50,0xe6]
vpdpbsud xmm12, xmm13, xmm14

// CHECK: vpdpbsud ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x16,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbsud ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbsud ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x16,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbsud ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbsud ymm12, ymm13, ymmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x16,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbsud ymm12, ymm13, ymmword ptr [rip]

// CHECK: vpdpbsud ymm12, ymm13, ymmword ptr [2*rbp - 1024]
// CHECK: encoding: [0xc4,0x62,0x16,0x50,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbsud ymm12, ymm13, ymmword ptr [2*rbp - 1024]

// CHECK: vpdpbsud xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x12,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbsud xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbsud xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x12,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbsud xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbsud xmm12, xmm13, xmmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x12,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbsud xmm12, xmm13, xmmword ptr [rip]

// CHECK: vpdpbsud xmm12, xmm13, xmmword ptr [2*rbp - 512]
// CHECK: encoding: [0xc4,0x62,0x12,0x50,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbsud xmm12, xmm13, xmmword ptr [2*rbp - 512]

// CHECK: vpdpbsuds ymm12, ymm13, ymm14
// CHECK: encoding: [0xc4,0x42,0x16,0x51,0xe6]
vpdpbsuds ymm12, ymm13, ymm14

// CHECK: vpdpbsuds xmm12, xmm13, xmm14
// CHECK: encoding: [0xc4,0x42,0x12,0x51,0xe6]
vpdpbsuds xmm12, xmm13, xmm14

// CHECK: vpdpbsuds ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x16,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbsuds ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbsuds ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x16,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbsuds ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbsuds ymm12, ymm13, ymmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x16,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbsuds ymm12, ymm13, ymmword ptr [rip]

// CHECK: vpdpbsuds ymm12, ymm13, ymmword ptr [2*rbp - 1024]
// CHECK: encoding: [0xc4,0x62,0x16,0x51,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbsuds ymm12, ymm13, ymmword ptr [2*rbp - 1024]

// CHECK: vpdpbsuds xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x12,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbsuds xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbsuds xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x12,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbsuds xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbsuds xmm12, xmm13, xmmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x12,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbsuds xmm12, xmm13, xmmword ptr [rip]

// CHECK: vpdpbsuds xmm12, xmm13, xmmword ptr [2*rbp - 512]
// CHECK: encoding: [0xc4,0x62,0x12,0x51,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbsuds xmm12, xmm13, xmmword ptr [2*rbp - 512]

// CHECK: vpdpbuud ymm12, ymm13, ymm14
// CHECK: encoding: [0xc4,0x42,0x14,0x50,0xe6]
vpdpbuud ymm12, ymm13, ymm14

// CHECK: vpdpbuud xmm12, xmm13, xmm14
// CHECK: encoding: [0xc4,0x42,0x10,0x50,0xe6]
vpdpbuud xmm12, xmm13, xmm14

// CHECK: vpdpbuud ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x14,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbuud ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbuud ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x14,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbuud ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbuud ymm12, ymm13, ymmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x14,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbuud ymm12, ymm13, ymmword ptr [rip]

// CHECK: vpdpbuud ymm12, ymm13, ymmword ptr [2*rbp - 1024]
// CHECK: encoding: [0xc4,0x62,0x14,0x50,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbuud ymm12, ymm13, ymmword ptr [2*rbp - 1024]

// CHECK: vpdpbuud xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x10,0x50,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbuud xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbuud xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x10,0x50,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbuud xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbuud xmm12, xmm13, xmmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x10,0x50,0x25,0x00,0x00,0x00,0x00]
vpdpbuud xmm12, xmm13, xmmword ptr [rip]

// CHECK: vpdpbuud xmm12, xmm13, xmmword ptr [2*rbp - 512]
// CHECK: encoding: [0xc4,0x62,0x10,0x50,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbuud xmm12, xmm13, xmmword ptr [2*rbp - 512]

// CHECK: vpdpbuuds ymm12, ymm13, ymm14
// CHECK: encoding: [0xc4,0x42,0x14,0x51,0xe6]
vpdpbuuds ymm12, ymm13, ymm14

// CHECK: vpdpbuuds xmm12, xmm13, xmm14
// CHECK: encoding: [0xc4,0x42,0x10,0x51,0xe6]
vpdpbuuds xmm12, xmm13, xmm14

// CHECK: vpdpbuuds ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x14,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbuuds ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbuuds ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x14,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbuuds ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbuuds ymm12, ymm13, ymmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x14,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbuuds ymm12, ymm13, ymmword ptr [rip]

// CHECK: vpdpbuuds ymm12, ymm13, ymmword ptr [2*rbp - 1024]
// CHECK: encoding: [0xc4,0x62,0x14,0x51,0x24,0x6d,0x00,0xfc,0xff,0xff]
vpdpbuuds ymm12, ymm13, ymmword ptr [2*rbp - 1024]

// CHECK: vpdpbuuds xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x10,0x51,0xa4,0xf5,0x00,0x00,0x00,0x10]
vpdpbuuds xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]

// CHECK: vpdpbuuds xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x10,0x51,0xa4,0x80,0x23,0x01,0x00,0x00]
vpdpbuuds xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]

// CHECK: vpdpbuuds xmm12, xmm13, xmmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x10,0x51,0x25,0x00,0x00,0x00,0x00]
vpdpbuuds xmm12, xmm13, xmmword ptr [rip]

// CHECK: vpdpbuuds xmm12, xmm13, xmmword ptr [2*rbp - 512]
// CHECK: encoding: [0xc4,0x62,0x10,0x51,0x24,0x6d,0x00,0xfe,0xff,0xff]
vpdpbuuds xmm12, xmm13, xmmword ptr [2*rbp - 512]