52 changes: 52 additions & 0 deletions clang/test/CodeGen/avxifma-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror | FileCheck %s

#include <immintrin.h>

__m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: @test_mm_madd52hi_epu64
// CHECK: call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128
return _mm_madd52hi_epu64(__X, __Y, __Z);
}

__m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: @test_mm256_madd52hi_epu64
// CHECK: call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256
return _mm256_madd52hi_epu64(__X, __Y, __Z);
}

__m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: @test_mm_madd52lo_epu64
// CHECK: call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128
return _mm_madd52lo_epu64(__X, __Y, __Z);
}

__m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: @test_mm256_madd52lo_epu64
// CHECK: call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256
return _mm256_madd52lo_epu64(__X, __Y, __Z);
}

__m128i test_mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: @test_mm_madd52hi_avx_epu64
// CHECK: call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128
return _mm_madd52hi_avx_epu64(__X, __Y, __Z);
}

__m256i test_mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: @test_mm256_madd52hi_avx_epu64
// CHECK: call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256
return _mm256_madd52hi_avx_epu64(__X, __Y, __Z);
}

__m128i test_mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: @test_mm_madd52lo_avx_epu64
// CHECK: call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128
return _mm_madd52lo_avx_epu64(__X, __Y, __Z);
}

__m256i test_mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: @test_mm256_madd52lo_avx_epu64
// CHECK: call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256
return _mm256_madd52lo_avx_epu64(__X, __Y, __Z);
}
5 changes: 5 additions & 0 deletions clang/test/Driver/x86-target-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,11 @@
// RAOINT: "-target-feature" "+raoint"
// NO-RAOINT: "-target-feature" "-raoint"

// RUN: %clang -target i386-linux-gnu -mavxifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVXIFMA %s
// RUN: %clang -target i386-linux-gnu -mno-avxifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVXIFMA %s
// AVXIFMA: "-target-feature" "+avxifma"
// NO-AVXIFMA: "-target-feature" "-avxifma"

// RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
// RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
// CRC32: "-target-feature" "+crc32"
Expand Down
9 changes: 9 additions & 0 deletions clang/test/Preprocessor/x86_target_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,15 @@
// RUN: %clang -target x86_64-unknown-linux-gnu -march=atom -mno-cmpccxadd -x c -E -dM -o - %s | FileCheck -check-prefix=NO-CMPCCXADD %s

// NO-CMPCCXADD-NOT: #define __CMPCCXADD__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavxifma -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXIFMA %s

// AVXIFMA: #define __AVX2__ 1
// AVXIFMA: #define __AVXIFMA__ 1

// RUN: %clang -target i386-unknown-unknown -march=atom -mavxifma -mno-avx2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXIFMANOAVX2 %s

// AVXIFMANOAVX2-NOT: #define __AVX2__ 1
// AVXIFMANOAVX2-NOT: #define __AVXIFMA__ 1

// RUN: %clang -target i386-unknown-linux-gnu -march=atom -mraoint -x c -E -dM -o - %s | FileCheck -check-prefix=RAOINT %s

Expand Down
1 change: 1 addition & 0 deletions llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ Changes to the Windows Target

Changes to the X86 Backend
--------------------------
* Support ISA of ``AVX-IFMA``.

* Add support for the ``RDMSRLIST and WRMSRLIST`` instructions.
* Add support for the ``WRMSRNS`` instruction.
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/X86TargetParser.def
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ X86_FEATURE (AVX512FP16, "avx512fp16")
X86_FEATURE (AMX_FP16, "amx-fp16")
X86_FEATURE (CMPCCXADD, "cmpccxadd")
X86_FEATURE (AVXVNNI, "avxvnni")
X86_FEATURE (AVXIFMA, "avxifma")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1811,6 +1811,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["amx-fp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);

bool HasLeafD = MaxLevel >= 0xd &&
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/X86TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ constexpr FeatureBitset ImpliedFeaturesHRESET = {};
constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {};
constexpr FeatureBitset ImpliedFeaturesCMPCCXADD = {};
constexpr FeatureBitset ImpliedFeaturesRAOINT = {};
constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
// Key Locker Features
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
"Enable AVX-512 further Vector Byte Manipulation Instructions",
[FeatureBWI]>;
def FeatureAVXIFMA : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
"Enable AVX-IFMA",
[FeatureAVX2]>;
def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
"Enable AVX-512 Integer Fused Multiple-Add",
[FeatureAVX512]>;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86InstrFoldTables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4103,12 +4103,16 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VPLZCNTQZ128rrk, X86::VPLZCNTQZ128rmk, 0 },
{ X86::VPLZCNTQZ256rrk, X86::VPLZCNTQZ256rmk, 0 },
{ X86::VPLZCNTQZrrk, X86::VPLZCNTQZrmk, 0 },
{ X86::VPMADD52HUQYrr, X86::VPMADD52HUQYrm, 0 },
{ X86::VPMADD52HUQZ128r, X86::VPMADD52HUQZ128m, 0 },
{ X86::VPMADD52HUQZ256r, X86::VPMADD52HUQZ256m, 0 },
{ X86::VPMADD52HUQZr, X86::VPMADD52HUQZm, 0 },
{ X86::VPMADD52HUQrr, X86::VPMADD52HUQrm, 0 },
{ X86::VPMADD52LUQYrr, X86::VPMADD52LUQYrm, 0 },
{ X86::VPMADD52LUQZ128r, X86::VPMADD52LUQZ128m, 0 },
{ X86::VPMADD52LUQZ256r, X86::VPMADD52LUQZ256m, 0 },
{ X86::VPMADD52LUQZr, X86::VPMADD52LUQZm, 0 },
{ X86::VPMADD52LUQrr, X86::VPMADD52LUQrm, 0 },
{ X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 },
{ X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 },
{ X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 },
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2573,6 +2573,8 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VPDPWSSDSZr:
case X86::VPDPWSSDSZrk:
case X86::VPDPWSSDSZrkz:
case X86::VPMADD52HUQrr:
case X86::VPMADD52HUQYrr:
case X86::VPMADD52HUQZ128r:
case X86::VPMADD52HUQZ128rk:
case X86::VPMADD52HUQZ128rkz:
Expand All @@ -2582,6 +2584,8 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VPMADD52HUQZr:
case X86::VPMADD52HUQZrk:
case X86::VPMADD52HUQZrkz:
case X86::VPMADD52LUQrr:
case X86::VPMADD52LUQYrr:
case X86::VPMADD52LUQZ128r:
case X86::VPMADD52LUQZ128rk:
case X86::VPMADD52LUQZ128rkz:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,8 @@ def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">;
def HasVBMI : Predicate<"Subtarget->hasVBMI()">;
def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">;
def HasIFMA : Predicate<"Subtarget->hasIFMA()">;
def HasAVXIFMA : Predicate<"Subtarget->hasAVXIFMA()">;
def NoVLX_Or_NoIFMA : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasIFMA()">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
Expand Down
37 changes: 37 additions & 0 deletions llvm/lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -8123,3 +8123,40 @@ let isCommutable = 0 in {
X86GF2P8affineqb>, TAPD;
}

let Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst",
checkVEXPredicate = 1 in
multiclass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> {
// NOTE: The SDNode have the multiply operands first with the add last.
// This enables commuted load patterns to be autogenerated by tablegen.
let isCommutable = 1 in {
def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst, (v2i64 (OpNode VR128:$src2,
VR128:$src3, VR128:$src1)))]>,
VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
}
def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst, (v2i64 (OpNode VR128:$src2,
(loadv2i64 addr:$src3), VR128:$src1)))]>,
VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
let isCommutable = 1 in {
def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR256:$dst, (v4i64 (OpNode VR256:$src2,
VR256:$src3, VR256:$src1)))]>,
VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
}
def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i256mem:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR256:$dst, (v4i64 (OpNode VR256:$src2,
(loadv4i64 addr:$src3), VR256:$src1)))]>,
VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
}

defm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, VEX_W, ExplicitVEXPrefix;
defm VPMADD52LUQ : avx_ifma_rm<0xb4, "vpmadd52luq", x86vpmadd52l>, VEX_W, ExplicitVEXPrefix;
69 changes: 69 additions & 0 deletions llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxifma --show-mc-encoding | FileCheck %s --check-prefix=AVXIFMA
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma --show-mc-encoding | FileCheck %s --check-prefix=AVXIFMA
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxifma,+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefix=AVX512IFMA
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma,+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefix=AVX512IFMA

declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)

define <2 x i64>@test_int_x86_avx_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
; AVXIFMA-LABEL: test_int_x86_avx_vpmadd52h_uq_128:
; AVXIFMA: # %bb.0:
; AVXIFMA-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xb5,0xc2]
; AVXIFMA-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512IFMA-LABEL: test_int_x86_avx_vpmadd52h_uq_128:
; AVX512IFMA: # %bb.0:
; AVX512IFMA-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xb5,0xc2]
; AVX512IFMA-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
ret <2 x i64> %res
}

declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)

define <4 x i64>@test_int_x86_avx_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
; AVXIFMA-LABEL: test_int_x86_avx_vpmadd52h_uq_256:
; AVXIFMA: # %bb.0:
; AVXIFMA-NEXT: {vex} vpmadd52huq %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xb5,0xc2]
; AVXIFMA-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512IFMA-LABEL: test_int_x86_avx_vpmadd52h_uq_256:
; AVX512IFMA: # %bb.0:
; AVX512IFMA-NEXT: {vex} vpmadd52huq %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xb5,0xc2]
; AVX512IFMA-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
ret <4 x i64> %res
}

declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)

define <2 x i64>@test_int_x86_avx_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
; AVXIFMA-LABEL: test_int_x86_avx_vpmadd52l_uq_128:
; AVXIFMA: # %bb.0:
; AVXIFMA-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xb4,0xc2]
; AVXIFMA-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512IFMA-LABEL: test_int_x86_avx_vpmadd52l_uq_128:
; AVX512IFMA: # %bb.0:
; AVX512IFMA-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xb4,0xc2]
; AVX512IFMA-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
ret <2 x i64> %res
}

declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)

define <4 x i64>@test_int_x86_avx_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
; AVXIFMA-LABEL: test_int_x86_avx_vpmadd52l_uq_256:
; AVXIFMA: # %bb.0:
; AVXIFMA-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xb4,0xc2]
; AVXIFMA-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX512IFMA-LABEL: test_int_x86_avx_vpmadd52l_uq_256:
; AVX512IFMA: # %bb.0:
; AVX512IFMA-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xb4,0xc2]
; AVX512IFMA-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
ret <4 x i64> %res
}
217 changes: 217 additions & 0 deletions llvm/test/CodeGen/X86/stack-folding-int-avx512ifma.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl < %s | FileCheck %s

declare <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)
declare <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64>, <8 x i64>, <8 x i64>)

define <8 x i64> @stack_fold_vpmadd52huq(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52huq:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %zmm1, %zmm0 # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2)
ret <8 x i64> %2
}

define <8 x i64> @stack_fold_vpmadd52huq_commuted(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52huq_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %zmm1, %zmm0 # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <8 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %a0, <8 x i64> %a2, <8 x i64> %a1)
ret <8 x i64> %2
}

define <8 x i64> @stack_fold_vpmadd52huq_mask(ptr %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
; CHECK-LABEL: stack_fold_vpmadd52huq_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = load <8 x i64>, ptr %a0
%3 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %2, <8 x i64> %a1, <8 x i64> %a2)
%4 = bitcast i8 %mask to <8 x i1>
%5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %2
ret <8 x i64> %5
}

define <8 x i64> @stack_fold_vpmadd52huq_mask_commuted(ptr %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
; CHECK-LABEL: stack_fold_vpmadd52huq_mask_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = load <8 x i64>, ptr %a0
%3 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %2, <8 x i64> %a2, <8 x i64> %a1)
%4 = bitcast i8 %mask to <8 x i1>
%5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %2
ret <8 x i64> %5
}

define <8 x i64> @stack_fold_vpmadd52huq_maskz(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, ptr %mask) {
; CHECK-LABEL: stack_fold_vpmadd52huq_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %zmm1, %zmm0 {%k1} {z} # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2)
%3 = load i8, ptr %mask
%4 = bitcast i8 %3 to <8 x i1>
%5 = select <8 x i1> %4, <8 x i64> %2, <8 x i64> zeroinitializer
ret <8 x i64> %5
}

define <8 x i64> @stack_fold_vpmadd52huq_maskz_commuted(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, ptr %mask) {
; CHECK-LABEL: stack_fold_vpmadd52huq_maskz_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %zmm1, %zmm0 {%k1} {z} # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = call <8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %a0, <8 x i64> %a2, <8 x i64> %a1)
%3 = load i8, ptr %mask
%4 = bitcast i8 %3 to <8 x i1>
%5 = select <8 x i1> %4, <8 x i64> %2, <8 x i64> zeroinitializer
ret <8 x i64> %5
}

define <8 x i64> @stack_fold_vpmadd52luq(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52luq:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %zmm1, %zmm0 # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2)
ret <8 x i64> %2
}

define <8 x i64> @stack_fold_vpmadd52luq_commuted(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52luq_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %zmm1, %zmm0 # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <8 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %a0, <8 x i64> %a2, <8 x i64> %a1)
ret <8 x i64> %2
}

define <8 x i64> @stack_fold_vpmadd52luq_mask(ptr %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
; CHECK-LABEL: stack_fold_vpmadd52luq_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = load <8 x i64>, ptr %a0
%3 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %2, <8 x i64> %a1, <8 x i64> %a2)
%4 = bitcast i8 %mask to <8 x i1>
%5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %2
ret <8 x i64> %5
}

define <8 x i64> @stack_fold_vpmadd52luq_mask_commuted(ptr %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
; CHECK-LABEL: stack_fold_vpmadd52luq_mask_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm2 {%k1} # 64-byte Folded Reload
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = load <8 x i64>, ptr %a0
%3 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %2, <8 x i64> %a2, <8 x i64> %a1)
%4 = bitcast i8 %mask to <8 x i1>
%5 = select <8 x i1> %4, <8 x i64> %3, <8 x i64> %2
ret <8 x i64> %5
}

define <8 x i64> @stack_fold_vpmadd52luq_maskz(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, ptr %mask) {
; CHECK-LABEL: stack_fold_vpmadd52luq_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %zmm1, %zmm0 {%k1} {z} # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2)
%3 = load i8, ptr %mask
%4 = bitcast i8 %3 to <8 x i1>
%5 = select <8 x i1> %4, <8 x i64> %2, <8 x i64> zeroinitializer
ret <8 x i64> %5
}

define <8 x i64> @stack_fold_vpmadd52luq_maskz_commuted(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, ptr %mask) {
; CHECK-LABEL: stack_fold_vpmadd52luq_maskz_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %zmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %zmm1, %zmm0 {%k1} {z} # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = call <8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %a0, <8 x i64> %a2, <8 x i64> %a1)
%3 = load i8, ptr %mask
%4 = bitcast i8 %3 to <8 x i1>
%5 = select <8 x i1> %4, <8 x i64> %2, <8 x i64> zeroinitializer
ret <8 x i64> %5
}
119 changes: 119 additions & 0 deletions llvm/test/CodeGen/X86/stack-folding-int-avxifma.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avxifma < %s | FileCheck %s

declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)

define <2 x i64> @stack_fold_vpmadd52huq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52huq:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: {vex} vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2)
ret <2 x i64> %2
}

define <2 x i64> @stack_fold_vpmadd52huq_commuted(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52huq_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: {vex} vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %a0, <2 x i64> %a2, <2 x i64> %a1)
ret <2 x i64> %2
}

define <4 x i64> @stack_fold_vpmadd52huq_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52huq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: {vex} vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2)
ret <4 x i64> %2
}

define <4 x i64> @stack_fold_vpmadd52huq_256_commuted(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52huq_256_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: {vex} vpmadd52huq {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %a0, <4 x i64> %a2, <4 x i64> %a1)
ret <4 x i64> %2
}

define <2 x i64> @stack_fold_vpmadd52luq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52luq:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: {vex} vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2)
ret <2 x i64> %2
}

define <2 x i64> @stack_fold_vpmadd52luq_commuted(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52luq_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: {vex} vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %a0, <2 x i64> %a2, <2 x i64> %a1)
ret <2 x i64> %2
}

define <4 x i64> @stack_fold_vpmadd52luq_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52luq_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: {vex} vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2)
ret <4 x i64> %2
}

define <4 x i64> @stack_fold_vpmadd52luq_256_commuted(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
; CHECK-LABEL: stack_fold_vpmadd52luq_256_commuted:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: {vex} vpmadd52luq {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %a0, <4 x i64> %a2, <4 x i64> %a1)
ret <4 x i64> %2
}
115 changes: 115 additions & 0 deletions llvm/test/MC/Disassembler/X86/avx-ifma-32.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# RUN: llvm-mc --disassemble %s -triple=i686 | FileCheck %s --check-prefixes=ATT
# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL

# ATT: {vex} vpmadd52huq %ymm4, %ymm3, %ymm2
# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymm4
0xc4,0xe2,0xe5,0xb5,0xd4

# ATT: {vex} vpmadd52huq %xmm4, %xmm3, %xmm2
# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmm4
0xc4,0xe2,0xe1,0xb5,0xd4

# ATT: {vex} vpmadd52huq 268435456(%esp,%esi,8), %ymm3, %ymm2
# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0xe5,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: {vex} vpmadd52huq 291(%edi,%eax,4), %ymm3, %ymm2
# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0xe5,0xb5,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: {vex} vpmadd52huq (%eax), %ymm3, %ymm2
# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [eax]
0xc4,0xe2,0xe5,0xb5,0x10

# ATT: {vex} vpmadd52huq -1024(,%ebp,2), %ymm3, %ymm2
# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [2*ebp - 1024]
0xc4,0xe2,0xe5,0xb5,0x14,0x6d,0x00,0xfc,0xff,0xff

# ATT: {vex} vpmadd52huq 4064(%ecx), %ymm3, %ymm2
# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [ecx + 4064]
0xc4,0xe2,0xe5,0xb5,0x91,0xe0,0x0f,0x00,0x00

# ATT: {vex} vpmadd52huq -4096(%edx), %ymm3, %ymm2
# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edx - 4096]
0xc4,0xe2,0xe5,0xb5,0x92,0x00,0xf0,0xff,0xff

# ATT: {vex} vpmadd52huq 268435456(%esp,%esi,8), %xmm3, %xmm2
# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0xe1,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: {vex} vpmadd52huq 291(%edi,%eax,4), %xmm3, %xmm2
# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0xe1,0xb5,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: {vex} vpmadd52huq (%eax), %xmm3, %xmm2
# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [eax]
0xc4,0xe2,0xe1,0xb5,0x10

# ATT: {vex} vpmadd52huq -512(,%ebp,2), %xmm3, %xmm2
# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [2*ebp - 512]
0xc4,0xe2,0xe1,0xb5,0x14,0x6d,0x00,0xfe,0xff,0xff

# ATT: {vex} vpmadd52huq 2032(%ecx), %xmm3, %xmm2
# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [ecx + 2032]
0xc4,0xe2,0xe1,0xb5,0x91,0xf0,0x07,0x00,0x00

# ATT: {vex} vpmadd52huq -2048(%edx), %xmm3, %xmm2
# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edx - 2048]
0xc4,0xe2,0xe1,0xb5,0x92,0x00,0xf8,0xff,0xff

# ATT: {vex} vpmadd52luq %ymm4, %ymm3, %ymm2
# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymm4
0xc4,0xe2,0xe5,0xb4,0xd4

# ATT: {vex} vpmadd52luq %xmm4, %xmm3, %xmm2
# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmm4
0xc4,0xe2,0xe1,0xb4,0xd4

# ATT: {vex} vpmadd52luq 268435456(%esp,%esi,8), %ymm3, %ymm2
# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0xe5,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: {vex} vpmadd52luq 291(%edi,%eax,4), %ymm3, %ymm2
# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0xe5,0xb4,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: {vex} vpmadd52luq (%eax), %ymm3, %ymm2
# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [eax]
0xc4,0xe2,0xe5,0xb4,0x10

# ATT: {vex} vpmadd52luq -1024(,%ebp,2), %ymm3, %ymm2
# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [2*ebp - 1024]
0xc4,0xe2,0xe5,0xb4,0x14,0x6d,0x00,0xfc,0xff,0xff

# ATT: {vex} vpmadd52luq 4064(%ecx), %ymm3, %ymm2
# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [ecx + 4064]
0xc4,0xe2,0xe5,0xb4,0x91,0xe0,0x0f,0x00,0x00

# ATT: {vex} vpmadd52luq -4096(%edx), %ymm3, %ymm2
# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edx - 4096]
0xc4,0xe2,0xe5,0xb4,0x92,0x00,0xf0,0xff,0xff

# ATT: {vex} vpmadd52luq 268435456(%esp,%esi,8), %xmm3, %xmm2
# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
0xc4,0xe2,0xe1,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10

# ATT: {vex} vpmadd52luq 291(%edi,%eax,4), %xmm3, %xmm2
# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
0xc4,0xe2,0xe1,0xb4,0x94,0x87,0x23,0x01,0x00,0x00

# ATT: {vex} vpmadd52luq (%eax), %xmm3, %xmm2
# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [eax]
0xc4,0xe2,0xe1,0xb4,0x10

# ATT: {vex} vpmadd52luq -512(,%ebp,2), %xmm3, %xmm2
# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [2*ebp - 512]
0xc4,0xe2,0xe1,0xb4,0x14,0x6d,0x00,0xfe,0xff,0xff

# ATT: {vex} vpmadd52luq 2032(%ecx), %xmm3, %xmm2
# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [ecx + 2032]
0xc4,0xe2,0xe1,0xb4,0x91,0xf0,0x07,0x00,0x00

# ATT: {vex} vpmadd52luq -2048(%edx), %xmm3, %xmm2
# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edx - 2048]
0xc4,0xe2,0xe1,0xb4,0x92,0x00,0xf8,0xff,0xff

115 changes: 115 additions & 0 deletions llvm/test/MC/Disassembler/X86/avx-ifma-64.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL

# ATT: {vex} vpmadd52huq %ymm14, %ymm13, %ymm12
# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymm14
0xc4,0x42,0x95,0xb5,0xe6

# ATT: {vex} vpmadd52huq %xmm14, %xmm13, %xmm12
# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmm14
0xc4,0x42,0x91,0xb5,0xe6

# ATT: {vex} vpmadd52huq 268435456(%rbp,%r14,8), %ymm13, %ymm12
# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: {vex} vpmadd52huq 291(%r8,%rax,4), %ymm13, %ymm12
# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: {vex} vpmadd52huq (%rip), %ymm13, %ymm12
# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00

# ATT: {vex} vpmadd52huq -1024(,%rbp,2), %ymm13, %ymm12
# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff

# ATT: {vex} vpmadd52huq 4064(%rcx), %ymm13, %ymm12
# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00

# ATT: {vex} vpmadd52huq -4096(%rdx), %ymm13, %ymm12
# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff

# ATT: {vex} vpmadd52huq 268435456(%rbp,%r14,8), %xmm13, %xmm12
# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: {vex} vpmadd52huq 291(%r8,%rax,4), %xmm13, %xmm12
# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: {vex} vpmadd52huq (%rip), %xmm13, %xmm12
# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00

# ATT: {vex} vpmadd52huq -512(,%rbp,2), %xmm13, %xmm12
# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff

# ATT: {vex} vpmadd52huq 2032(%rcx), %xmm13, %xmm12
# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00

# ATT: {vex} vpmadd52huq -2048(%rdx), %xmm13, %xmm12
# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff

# ATT: {vex} vpmadd52luq %ymm14, %ymm13, %ymm12
# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymm14
0xc4,0x42,0x95,0xb4,0xe6

# ATT: {vex} vpmadd52luq %xmm14, %xmm13, %xmm12
# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmm14
0xc4,0x42,0x91,0xb4,0xe6

# ATT: {vex} vpmadd52luq 268435456(%rbp,%r14,8), %ymm13, %ymm12
# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x95,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: {vex} vpmadd52luq 291(%r8,%rax,4), %ymm13, %ymm12
# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x95,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: {vex} vpmadd52luq (%rip), %ymm13, %ymm12
# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rip]
0xc4,0x62,0x95,0xb4,0x25,0x00,0x00,0x00,0x00

# ATT: {vex} vpmadd52luq -1024(,%rbp,2), %ymm13, %ymm12
# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
0xc4,0x62,0x95,0xb4,0x24,0x6d,0x00,0xfc,0xff,0xff

# ATT: {vex} vpmadd52luq 4064(%rcx), %ymm13, %ymm12
# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rcx + 4064]
0xc4,0x62,0x95,0xb4,0xa1,0xe0,0x0f,0x00,0x00

# ATT: {vex} vpmadd52luq -4096(%rdx), %ymm13, %ymm12
# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rdx - 4096]
0xc4,0x62,0x95,0xb4,0xa2,0x00,0xf0,0xff,0xff

# ATT: {vex} vpmadd52luq 268435456(%rbp,%r14,8), %xmm13, %xmm12
# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
0xc4,0x22,0x91,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10

# ATT: {vex} vpmadd52luq 291(%r8,%rax,4), %xmm13, %xmm12
# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
0xc4,0x42,0x91,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00

# ATT: {vex} vpmadd52luq (%rip), %xmm13, %xmm12
# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rip]
0xc4,0x62,0x91,0xb4,0x25,0x00,0x00,0x00,0x00

# ATT: {vex} vpmadd52luq -512(,%rbp,2), %xmm13, %xmm12
# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [2*rbp - 512]
0xc4,0x62,0x91,0xb4,0x24,0x6d,0x00,0xfe,0xff,0xff

# ATT: {vex} vpmadd52luq 2032(%rcx), %xmm13, %xmm12
# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rcx + 2032]
0xc4,0x62,0x91,0xb4,0xa1,0xf0,0x07,0x00,0x00

# ATT: {vex} vpmadd52luq -2048(%rdx), %xmm13, %xmm12
# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rdx - 2048]
0xc4,0x62,0x91,0xb4,0xa2,0x00,0xf8,0xff,0xff

114 changes: 114 additions & 0 deletions llvm/test/MC/X86/avx-ifma-att-32.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// RUN: llvm-mc -triple i686-unknown-unknown -mattr=+avxifma --show-encoding %s | FileCheck %s

// CHECK: {vex} vpmadd52huq %ymm4, %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0xd4]
{vex} vpmadd52huq %ymm4, %ymm3, %ymm2

// CHECK: {vex} vpmadd52huq %xmm4, %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0xd4]
{vex} vpmadd52huq %xmm4, %xmm3, %xmm2

// CHECK: {vex} vpmadd52huq 268435456(%esp,%esi,8), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10]
{vex} vpmadd52huq 268435456(%esp,%esi,8), %ymm3, %ymm2

// CHECK: {vex} vpmadd52huq 291(%edi,%eax,4), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x94,0x87,0x23,0x01,0x00,0x00]
{vex} vpmadd52huq 291(%edi,%eax,4), %ymm3, %ymm2

// CHECK: {vex} vpmadd52huq (%eax), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x10]
{vex} vpmadd52huq (%eax), %ymm3, %ymm2

// CHECK: {vex} vpmadd52huq -1024(,%ebp,2), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x14,0x6d,0x00,0xfc,0xff,0xff]
{vex} vpmadd52huq -1024(,%ebp,2), %ymm3, %ymm2

// CHECK: {vex} vpmadd52huq 4064(%ecx), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x91,0xe0,0x0f,0x00,0x00]
{vex} vpmadd52huq 4064(%ecx), %ymm3, %ymm2

// CHECK: {vex} vpmadd52huq -4096(%edx), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x92,0x00,0xf0,0xff,0xff]
{vex} vpmadd52huq -4096(%edx), %ymm3, %ymm2

// CHECK: {vex} vpmadd52huq 268435456(%esp,%esi,8), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10]
{vex} vpmadd52huq 268435456(%esp,%esi,8), %xmm3, %xmm2

// CHECK: {vex} vpmadd52huq 291(%edi,%eax,4), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x94,0x87,0x23,0x01,0x00,0x00]
{vex} vpmadd52huq 291(%edi,%eax,4), %xmm3, %xmm2

// CHECK: {vex} vpmadd52huq (%eax), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x10]
{vex} vpmadd52huq (%eax), %xmm3, %xmm2

// CHECK: {vex} vpmadd52huq -512(,%ebp,2), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x14,0x6d,0x00,0xfe,0xff,0xff]
{vex} vpmadd52huq -512(,%ebp,2), %xmm3, %xmm2

// CHECK: {vex} vpmadd52huq 2032(%ecx), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x91,0xf0,0x07,0x00,0x00]
{vex} vpmadd52huq 2032(%ecx), %xmm3, %xmm2

// CHECK: {vex} vpmadd52huq -2048(%edx), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x92,0x00,0xf8,0xff,0xff]
{vex} vpmadd52huq -2048(%edx), %xmm3, %xmm2

// CHECK: {vex} vpmadd52luq %ymm4, %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0xd4]
{vex} vpmadd52luq %ymm4, %ymm3, %ymm2

// CHECK: {vex} vpmadd52luq %xmm4, %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0xd4]
{vex} vpmadd52luq %xmm4, %xmm3, %xmm2

// CHECK: {vex} vpmadd52luq 268435456(%esp,%esi,8), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10]
{vex} vpmadd52luq 268435456(%esp,%esi,8), %ymm3, %ymm2

// CHECK: {vex} vpmadd52luq 291(%edi,%eax,4), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x94,0x87,0x23,0x01,0x00,0x00]
{vex} vpmadd52luq 291(%edi,%eax,4), %ymm3, %ymm2

// CHECK: {vex} vpmadd52luq (%eax), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x10]
{vex} vpmadd52luq (%eax), %ymm3, %ymm2

// CHECK: {vex} vpmadd52luq -1024(,%ebp,2), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x14,0x6d,0x00,0xfc,0xff,0xff]
{vex} vpmadd52luq -1024(,%ebp,2), %ymm3, %ymm2

// CHECK: {vex} vpmadd52luq 4064(%ecx), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x91,0xe0,0x0f,0x00,0x00]
{vex} vpmadd52luq 4064(%ecx), %ymm3, %ymm2

// CHECK: {vex} vpmadd52luq -4096(%edx), %ymm3, %ymm2
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x92,0x00,0xf0,0xff,0xff]
{vex} vpmadd52luq -4096(%edx), %ymm3, %ymm2

// CHECK: {vex} vpmadd52luq 268435456(%esp,%esi,8), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10]
{vex} vpmadd52luq 268435456(%esp,%esi,8), %xmm3, %xmm2

// CHECK: {vex} vpmadd52luq 291(%edi,%eax,4), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x94,0x87,0x23,0x01,0x00,0x00]
{vex} vpmadd52luq 291(%edi,%eax,4), %xmm3, %xmm2

// CHECK: {vex} vpmadd52luq (%eax), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x10]
{vex} vpmadd52luq (%eax), %xmm3, %xmm2

// CHECK: {vex} vpmadd52luq -512(,%ebp,2), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x14,0x6d,0x00,0xfe,0xff,0xff]
{vex} vpmadd52luq -512(,%ebp,2), %xmm3, %xmm2

// CHECK: {vex} vpmadd52luq 2032(%ecx), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x91,0xf0,0x07,0x00,0x00]
{vex} vpmadd52luq 2032(%ecx), %xmm3, %xmm2

// CHECK: {vex} vpmadd52luq -2048(%edx), %xmm3, %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x92,0x00,0xf8,0xff,0xff]
{vex} vpmadd52luq -2048(%edx), %xmm3, %xmm2

114 changes: 114 additions & 0 deletions llvm/test/MC/X86/avx-ifma-att-64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// RUN: llvm-mc -triple=x86_64-unknown-unknown -mattr=+avxifma --show-encoding < %s | FileCheck %s

// CHECK: {vex} vpmadd52huq %ymm14, %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
{vex} vpmadd52huq %ymm14, %ymm13, %ymm12

// CHECK: {vex} vpmadd52huq %xmm14, %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
{vex} vpmadd52huq %xmm14, %xmm13, %xmm12

// CHECK: {vex} vpmadd52huq 268435456(%rbp,%r14,8), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
{vex} vpmadd52huq 268435456(%rbp,%r14,8), %ymm13, %ymm12

// CHECK: {vex} vpmadd52huq 291(%r8,%rax,4), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
{vex} vpmadd52huq 291(%r8,%rax,4), %ymm13, %ymm12

// CHECK: {vex} vpmadd52huq (%rip), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
{vex} vpmadd52huq (%rip), %ymm13, %ymm12

// CHECK: {vex} vpmadd52huq -1024(,%rbp,2), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
{vex} vpmadd52huq -1024(,%rbp,2), %ymm13, %ymm12

// CHECK: {vex} vpmadd52huq 4064(%rcx), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
{vex} vpmadd52huq 4064(%rcx), %ymm13, %ymm12

// CHECK: {vex} vpmadd52huq -4096(%rdx), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
{vex} vpmadd52huq -4096(%rdx), %ymm13, %ymm12

// CHECK: {vex} vpmadd52huq 268435456(%rbp,%r14,8), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
{vex} vpmadd52huq 268435456(%rbp,%r14,8), %xmm13, %xmm12

// CHECK: {vex} vpmadd52huq 291(%r8,%rax,4), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
{vex} vpmadd52huq 291(%r8,%rax,4), %xmm13, %xmm12

// CHECK: {vex} vpmadd52huq (%rip), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
{vex} vpmadd52huq (%rip), %xmm13, %xmm12

// CHECK: {vex} vpmadd52huq -512(,%rbp,2), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
{vex} vpmadd52huq -512(,%rbp,2), %xmm13, %xmm12

// CHECK: {vex} vpmadd52huq 2032(%rcx), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
{vex} vpmadd52huq 2032(%rcx), %xmm13, %xmm12

// CHECK: {vex} vpmadd52huq -2048(%rdx), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
{vex} vpmadd52huq -2048(%rdx), %xmm13, %xmm12

// CHECK: {vex} vpmadd52luq %ymm14, %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
{vex} vpmadd52luq %ymm14, %ymm13, %ymm12

// CHECK: {vex} vpmadd52luq %xmm14, %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
{vex} vpmadd52luq %xmm14, %xmm13, %xmm12

// CHECK: {vex} vpmadd52luq 268435456(%rbp,%r14,8), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x22,0x95,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10]
{vex} vpmadd52luq 268435456(%rbp,%r14,8), %ymm13, %ymm12

// CHECK: {vex} vpmadd52luq 291(%r8,%rax,4), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00]
{vex} vpmadd52luq 291(%r8,%rax,4), %ymm13, %ymm12

// CHECK: {vex} vpmadd52luq (%rip), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0x25,0x00,0x00,0x00,0x00]
{vex} vpmadd52luq (%rip), %ymm13, %ymm12

// CHECK: {vex} vpmadd52luq -1024(,%rbp,2), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0x24,0x6d,0x00,0xfc,0xff,0xff]
{vex} vpmadd52luq -1024(,%rbp,2), %ymm13, %ymm12

// CHECK: {vex} vpmadd52luq 4064(%rcx), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0xa1,0xe0,0x0f,0x00,0x00]
{vex} vpmadd52luq 4064(%rcx), %ymm13, %ymm12

// CHECK: {vex} vpmadd52luq -4096(%rdx), %ymm13, %ymm12
// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0xa2,0x00,0xf0,0xff,0xff]
{vex} vpmadd52luq -4096(%rdx), %ymm13, %ymm12

// CHECK: {vex} vpmadd52luq 268435456(%rbp,%r14,8), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x22,0x91,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10]
{vex} vpmadd52luq 268435456(%rbp,%r14,8), %xmm13, %xmm12

// CHECK: {vex} vpmadd52luq 291(%r8,%rax,4), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00]
{vex} vpmadd52luq 291(%r8,%rax,4), %xmm13, %xmm12

// CHECK: {vex} vpmadd52luq (%rip), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0x25,0x00,0x00,0x00,0x00]
{vex} vpmadd52luq (%rip), %xmm13, %xmm12

// CHECK: {vex} vpmadd52luq -512(,%rbp,2), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0x24,0x6d,0x00,0xfe,0xff,0xff]
{vex} vpmadd52luq -512(,%rbp,2), %xmm13, %xmm12

// CHECK: {vex} vpmadd52luq 2032(%rcx), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0xa1,0xf0,0x07,0x00,0x00]
{vex} vpmadd52luq 2032(%rcx), %xmm13, %xmm12

// CHECK: {vex} vpmadd52luq -2048(%rdx), %xmm13, %xmm12
// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0xa2,0x00,0xf8,0xff,0xff]
{vex} vpmadd52luq -2048(%rdx), %xmm13, %xmm12

114 changes: 114 additions & 0 deletions llvm/test/MC/X86/avx-ifma-intel-32.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// RUN: llvm-mc -triple i686-unknown-unknown -mattr=+avxifma -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s

// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymm4
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0xd4]
{vex} vpmadd52huq ymm2, ymm3, ymm4

// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmm4
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0xd4]
{vex} vpmadd52huq xmm2, xmm3, xmm4

// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10]
{vex} vpmadd52huq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]

// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x94,0x87,0x23,0x01,0x00,0x00]
{vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]

// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x10]
{vex} vpmadd52huq ymm2, ymm3, ymmword ptr [eax]

// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [2*ebp - 1024]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x14,0x6d,0x00,0xfc,0xff,0xff]
{vex} vpmadd52huq ymm2, ymm3, ymmword ptr [2*ebp - 1024]

// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [ecx + 4064]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x91,0xe0,0x0f,0x00,0x00]
{vex} vpmadd52huq ymm2, ymm3, ymmword ptr [ecx + 4064]

// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edx - 4096]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x92,0x00,0xf0,0xff,0xff]
{vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edx - 4096]

// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10]
{vex} vpmadd52huq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]

// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x94,0x87,0x23,0x01,0x00,0x00]
{vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]

// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x10]
{vex} vpmadd52huq xmm2, xmm3, xmmword ptr [eax]

// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [2*ebp - 512]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x14,0x6d,0x00,0xfe,0xff,0xff]
{vex} vpmadd52huq xmm2, xmm3, xmmword ptr [2*ebp - 512]

// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [ecx + 2032]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x91,0xf0,0x07,0x00,0x00]
{vex} vpmadd52huq xmm2, xmm3, xmmword ptr [ecx + 2032]

// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edx - 2048]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x92,0x00,0xf8,0xff,0xff]
{vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edx - 2048]

// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymm4
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0xd4]
{vex} vpmadd52luq ymm2, ymm3, ymm4

// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmm4
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0xd4]
{vex} vpmadd52luq xmm2, xmm3, xmm4

// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10]
{vex} vpmadd52luq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]

// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x94,0x87,0x23,0x01,0x00,0x00]
{vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]

// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x10]
{vex} vpmadd52luq ymm2, ymm3, ymmword ptr [eax]

// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [2*ebp - 1024]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x14,0x6d,0x00,0xfc,0xff,0xff]
{vex} vpmadd52luq ymm2, ymm3, ymmword ptr [2*ebp - 1024]

// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [ecx + 4064]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x91,0xe0,0x0f,0x00,0x00]
{vex} vpmadd52luq ymm2, ymm3, ymmword ptr [ecx + 4064]

// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edx - 4096]
// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x92,0x00,0xf0,0xff,0xff]
{vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edx - 4096]

// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10]
{vex} vpmadd52luq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]

// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x94,0x87,0x23,0x01,0x00,0x00]
{vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]

// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [eax]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x10]
{vex} vpmadd52luq xmm2, xmm3, xmmword ptr [eax]

// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [2*ebp - 512]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x14,0x6d,0x00,0xfe,0xff,0xff]
{vex} vpmadd52luq xmm2, xmm3, xmmword ptr [2*ebp - 512]

// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [ecx + 2032]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x91,0xf0,0x07,0x00,0x00]
{vex} vpmadd52luq xmm2, xmm3, xmmword ptr [ecx + 2032]

// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edx - 2048]
// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x92,0x00,0xf8,0xff,0xff]
{vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edx - 2048]

114 changes: 114 additions & 0 deletions llvm/test/MC/X86/avx-ifma-intel-64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxifma -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s

// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymm14
// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
{vex} vpmadd52huq ymm12, ymm13, ymm14

// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmm14
// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
{vex} vpmadd52huq xmm12, xmm13, xmm14

// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
{vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]

// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
{vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]

// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
{vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]

// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
{vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]

// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
{vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]

// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
{vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]

// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
{vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]

// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
{vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]

// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
{vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]

// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
{vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]

// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
{vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]

// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
{vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]

// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymm14
// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
{vex} vpmadd52luq ymm12, ymm13, ymm14

// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmm14
// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
{vex} vpmadd52luq xmm12, xmm13, xmm14

// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x95,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10]
{vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]

// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00]
{vex} vpmadd52luq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]

// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0x25,0x00,0x00,0x00,0x00]
{vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rip]

// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0x24,0x6d,0x00,0xfc,0xff,0xff]
{vex} vpmadd52luq ymm12, ymm13, ymmword ptr [2*rbp - 1024]

// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rcx + 4064]
// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0xa1,0xe0,0x0f,0x00,0x00]
{vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rcx + 4064]

// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rdx - 4096]
// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0xa2,0x00,0xf0,0xff,0xff]
{vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rdx - 4096]

// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
// CHECK: encoding: [0xc4,0x22,0x91,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10]
{vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]

// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00]
{vex} vpmadd52luq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]

// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rip]
// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0x25,0x00,0x00,0x00,0x00]
{vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rip]

// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [2*rbp - 512]
// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0x24,0x6d,0x00,0xfe,0xff,0xff]
{vex} vpmadd52luq xmm12, xmm13, xmmword ptr [2*rbp - 512]

// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rcx + 2032]
// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0xa1,0xf0,0x07,0x00,0x00]
{vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rcx + 2032]

// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rdx - 2048]
// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0xa2,0x00,0xf8,0xff,0xff]
{vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rdx - 2048]