-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[llvm-mca][x86] Ensure avxvnni tests actually test the avxvnni instructions #157892
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesNoticed while checking #97271 - discovered we weren't actually testing the vex variants of the vnni instructions in the avxvnni mca tests Fixing this causes the znver4 results to break, because it turns out we didn't have consistent instruction naming for the avx and avx512 variants, breaking the regex matching So add the missing reg operand to the avx512 vnni instruction signatures to match avx vnni Patch is 68.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157892.diff 9 Files Affected:
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 3401f6f04800e..b8f299965faa3 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12404,14 +12404,14 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
bit IsCommutable> {
let ExeDomain = VTI.ExeDomain in {
- defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
+ defm rr : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1,
VTI.RC:$src2, VTI.RC:$src3)),
IsCommutable, IsCommutable>,
EVEX, VVVV, T8, Sched<[sched]>;
- defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+ defm rm : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
@@ -12419,7 +12419,7 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8,
Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
- defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+ defm rmb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
"$src2, ${src3}"#VTI.BroadcastStr,
@@ -12459,24 +12459,24 @@ defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul
let Predicates = [HasVNNI] in {
def : Pat<(v16i32 (add VR512:$src1,
(X86vpmaddwd_su VR512:$src2, VR512:$src3))),
- (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
+ (VPDPWSSDZrr VR512:$src1, VR512:$src2, VR512:$src3)>;
def : Pat<(v16i32 (add VR512:$src1,
(X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
- (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
+ (VPDPWSSDZrm VR512:$src1, VR512:$src2, addr:$src3)>;
}
let Predicates = [HasVNNI,HasVLX] in {
def : Pat<(v8i32 (add VR256X:$src1,
(X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
- (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
+ (VPDPWSSDZ256rr VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
def : Pat<(v8i32 (add VR256X:$src1,
(X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
- (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
+ (VPDPWSSDZ256rm VR256X:$src1, VR256X:$src2, addr:$src3)>;
def : Pat<(v4i32 (add VR128X:$src1,
(X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
- (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
+ (VPDPWSSDZ128rr VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
def : Pat<(v4i32 (add VR128X:$src1,
(X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
- (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
+ (VPDPWSSDZ128rm VR128X:$src1, VR128X:$src2, addr:$src3)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index a68edf4d2b7ee..1f6915929646a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2939,78 +2939,78 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VPDPBUUDSYrr:
case X86::VPDPBUUDrr:
case X86::VPDPBUUDYrr:
- case X86::VPDPBSSDSZ128r:
- case X86::VPDPBSSDSZ128rk:
- case X86::VPDPBSSDSZ128rkz:
- case X86::VPDPBSSDSZ256r:
- case X86::VPDPBSSDSZ256rk:
- case X86::VPDPBSSDSZ256rkz:
- case X86::VPDPBSSDSZr:
- case X86::VPDPBSSDSZrk:
- case X86::VPDPBSSDSZrkz:
- case X86::VPDPBSSDZ128r:
- case X86::VPDPBSSDZ128rk:
- case X86::VPDPBSSDZ128rkz:
- case X86::VPDPBSSDZ256r:
- case X86::VPDPBSSDZ256rk:
- case X86::VPDPBSSDZ256rkz:
- case X86::VPDPBSSDZr:
- case X86::VPDPBSSDZrk:
- case X86::VPDPBSSDZrkz:
- case X86::VPDPBUUDSZ128r:
- case X86::VPDPBUUDSZ128rk:
- case X86::VPDPBUUDSZ128rkz:
- case X86::VPDPBUUDSZ256r:
- case X86::VPDPBUUDSZ256rk:
- case X86::VPDPBUUDSZ256rkz:
- case X86::VPDPBUUDSZr:
- case X86::VPDPBUUDSZrk:
- case X86::VPDPBUUDSZrkz:
- case X86::VPDPBUUDZ128r:
- case X86::VPDPBUUDZ128rk:
- case X86::VPDPBUUDZ128rkz:
- case X86::VPDPBUUDZ256r:
- case X86::VPDPBUUDZ256rk:
- case X86::VPDPBUUDZ256rkz:
- case X86::VPDPBUUDZr:
- case X86::VPDPBUUDZrk:
- case X86::VPDPBUUDZrkz:
- case X86::VPDPWSSDZ128r:
- case X86::VPDPWSSDZ128rk:
- case X86::VPDPWSSDZ128rkz:
- case X86::VPDPWSSDZ256r:
- case X86::VPDPWSSDZ256rk:
- case X86::VPDPWSSDZ256rkz:
- case X86::VPDPWSSDZr:
- case X86::VPDPWSSDZrk:
- case X86::VPDPWSSDZrkz:
- case X86::VPDPWSSDSZ128r:
- case X86::VPDPWSSDSZ128rk:
- case X86::VPDPWSSDSZ128rkz:
- case X86::VPDPWSSDSZ256r:
- case X86::VPDPWSSDSZ256rk:
- case X86::VPDPWSSDSZ256rkz:
- case X86::VPDPWSSDSZr:
- case X86::VPDPWSSDSZrk:
- case X86::VPDPWSSDSZrkz:
- case X86::VPDPWUUDZ128r:
- case X86::VPDPWUUDZ128rk:
- case X86::VPDPWUUDZ128rkz:
- case X86::VPDPWUUDZ256r:
- case X86::VPDPWUUDZ256rk:
- case X86::VPDPWUUDZ256rkz:
- case X86::VPDPWUUDZr:
- case X86::VPDPWUUDZrk:
- case X86::VPDPWUUDZrkz:
- case X86::VPDPWUUDSZ128r:
- case X86::VPDPWUUDSZ128rk:
- case X86::VPDPWUUDSZ128rkz:
- case X86::VPDPWUUDSZ256r:
- case X86::VPDPWUUDSZ256rk:
- case X86::VPDPWUUDSZ256rkz:
- case X86::VPDPWUUDSZr:
- case X86::VPDPWUUDSZrk:
- case X86::VPDPWUUDSZrkz:
+ case X86::VPDPBSSDSZ128rr:
+ case X86::VPDPBSSDSZ128rrk:
+ case X86::VPDPBSSDSZ128rrkz:
+ case X86::VPDPBSSDSZ256rr:
+ case X86::VPDPBSSDSZ256rrk:
+ case X86::VPDPBSSDSZ256rrkz:
+ case X86::VPDPBSSDSZrr:
+ case X86::VPDPBSSDSZrrk:
+ case X86::VPDPBSSDSZrrkz:
+ case X86::VPDPBSSDZ128rr:
+ case X86::VPDPBSSDZ128rrk:
+ case X86::VPDPBSSDZ128rrkz:
+ case X86::VPDPBSSDZ256rr:
+ case X86::VPDPBSSDZ256rrk:
+ case X86::VPDPBSSDZ256rrkz:
+ case X86::VPDPBSSDZrr:
+ case X86::VPDPBSSDZrrk:
+ case X86::VPDPBSSDZrrkz:
+ case X86::VPDPBUUDSZ128rr:
+ case X86::VPDPBUUDSZ128rrk:
+ case X86::VPDPBUUDSZ128rrkz:
+ case X86::VPDPBUUDSZ256rr:
+ case X86::VPDPBUUDSZ256rrk:
+ case X86::VPDPBUUDSZ256rrkz:
+ case X86::VPDPBUUDSZrr:
+ case X86::VPDPBUUDSZrrk:
+ case X86::VPDPBUUDSZrrkz:
+ case X86::VPDPBUUDZ128rr:
+ case X86::VPDPBUUDZ128rrk:
+ case X86::VPDPBUUDZ128rrkz:
+ case X86::VPDPBUUDZ256rr:
+ case X86::VPDPBUUDZ256rrk:
+ case X86::VPDPBUUDZ256rrkz:
+ case X86::VPDPBUUDZrr:
+ case X86::VPDPBUUDZrrk:
+ case X86::VPDPBUUDZrrkz:
+ case X86::VPDPWSSDZ128rr:
+ case X86::VPDPWSSDZ128rrk:
+ case X86::VPDPWSSDZ128rrkz:
+ case X86::VPDPWSSDZ256rr:
+ case X86::VPDPWSSDZ256rrk:
+ case X86::VPDPWSSDZ256rrkz:
+ case X86::VPDPWSSDZrr:
+ case X86::VPDPWSSDZrrk:
+ case X86::VPDPWSSDZrrkz:
+ case X86::VPDPWSSDSZ128rr:
+ case X86::VPDPWSSDSZ128rrk:
+ case X86::VPDPWSSDSZ128rrkz:
+ case X86::VPDPWSSDSZ256rr:
+ case X86::VPDPWSSDSZ256rrk:
+ case X86::VPDPWSSDSZ256rrkz:
+ case X86::VPDPWSSDSZrr:
+ case X86::VPDPWSSDSZrrk:
+ case X86::VPDPWSSDSZrrkz:
+ case X86::VPDPWUUDZ128rr:
+ case X86::VPDPWUUDZ128rrk:
+ case X86::VPDPWUUDZ128rrkz:
+ case X86::VPDPWUUDZ256rr:
+ case X86::VPDPWUUDZ256rrk:
+ case X86::VPDPWUUDZ256rrkz:
+ case X86::VPDPWUUDZrr:
+ case X86::VPDPWUUDZrrk:
+ case X86::VPDPWUUDZrrkz:
+ case X86::VPDPWUUDSZ128rr:
+ case X86::VPDPWUUDSZ128rrk:
+ case X86::VPDPWUUDSZ128rrkz:
+ case X86::VPDPWUUDSZ256rr:
+ case X86::VPDPWUUDSZ256rrk:
+ case X86::VPDPWUUDSZ256rrkz:
+ case X86::VPDPWUUDSZrr:
+ case X86::VPDPWUUDSZrrk:
+ case X86::VPDPWUUDSZrrkz:
case X86::VPMADD52HUQrr:
case X86::VPMADD52HUQYrr:
case X86::VPMADD52HUQZ128r:
@@ -10822,12 +10822,12 @@ bool X86InstrInfo::getMachineCombinerPatterns(
}
break;
}
- case X86::VPDPWSSDZ128r:
- case X86::VPDPWSSDZ128m:
- case X86::VPDPWSSDZ256r:
- case X86::VPDPWSSDZ256m:
- case X86::VPDPWSSDZr:
- case X86::VPDPWSSDZm: {
+ case X86::VPDPWSSDZ128rr:
+ case X86::VPDPWSSDZ128rm:
+ case X86::VPDPWSSDZ256rr:
+ case X86::VPDPWSSDZ256rm:
+ case X86::VPDPWSSDZrr:
+ case X86::VPDPWSSDZrm: {
if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
Patterns.push_back(X86MachineCombinerPattern::DPWSSD);
return true;
@@ -10866,11 +10866,11 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
MaddOpc = X86::VPMADDWDrm;
AddOpc = X86::VPADDDrr;
break;
- case X86::VPDPWSSDZ128r:
+ case X86::VPDPWSSDZ128rr:
MaddOpc = X86::VPMADDWDZ128rr;
AddOpc = X86::VPADDDZ128rr;
break;
- case X86::VPDPWSSDZ128m:
+ case X86::VPDPWSSDZ128rm:
MaddOpc = X86::VPMADDWDZ128rm;
AddOpc = X86::VPADDDZ128rr;
break;
@@ -10886,11 +10886,11 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
MaddOpc = X86::VPMADDWDYrm;
AddOpc = X86::VPADDDYrr;
break;
- case X86::VPDPWSSDZ256r:
+ case X86::VPDPWSSDZ256rr:
MaddOpc = X86::VPMADDWDZ256rr;
AddOpc = X86::VPADDDZ256rr;
break;
- case X86::VPDPWSSDZ256m:
+ case X86::VPDPWSSDZ256rm:
MaddOpc = X86::VPMADDWDZ256rm;
AddOpc = X86::VPADDDZ256rr;
break;
@@ -10898,11 +10898,11 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
// -->
// vpmaddwd zmm3,zmm3,zmm1
// vpaddd zmm2,zmm2,zmm3
- case X86::VPDPWSSDZr:
+ case X86::VPDPWSSDZrr:
MaddOpc = X86::VPMADDWDZrr;
AddOpc = X86::VPADDDZrr;
break;
- case X86::VPDPWSSDZm:
+ case X86::VPDPWSSDZrm:
MaddOpc = X86::VPMADDWDZrm;
AddOpc = X86::VPADDDZrr;
break;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index a93c7e3a82f17..cc300548a50e6 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1567,7 +1567,7 @@ def Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> {
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex
- "VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)",
+ "VPDP(BU|WS)(S|P)(S|D|DS)(Z?|Z128?|Z256?|Y?)r(r|rk|rkz)",
"VPMADD52(H|L)UQ(Z|Z128|Z256)(r|rk|rkz)"
)>;
diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s
index 8152d18f56c30..4b73a7fc0e8b8 100644
--- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s
+++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s
@@ -1,29 +1,29 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=alderlake -instruction-tables < %s | FileCheck %s
-vpdpbusd %xmm0, %xmm1, %xmm2
-vpdpbusd (%rax), %xmm1, %xmm2
+{vex} vpdpbusd %xmm0, %xmm1, %xmm2
+{vex} vpdpbusd (%rax), %xmm1, %xmm2
-vpdpbusd %ymm0, %ymm1, %ymm2
-vpdpbusd (%rax), %ymm1, %ymm2
+{vex} vpdpbusd %ymm0, %ymm1, %ymm2
+{vex} vpdpbusd (%rax), %ymm1, %ymm2
-vpdpbusds %xmm0, %xmm1, %xmm2
-vpdpbusds (%rax), %xmm1, %xmm2
+{vex} vpdpbusds %xmm0, %xmm1, %xmm2
+{vex} vpdpbusds (%rax), %xmm1, %xmm2
-vpdpbusds %ymm0, %ymm1, %ymm2
-vpdpbusds (%rax), %ymm1, %ymm2
+{vex} vpdpbusds %ymm0, %ymm1, %ymm2
+{vex} vpdpbusds (%rax), %ymm1, %ymm2
-vpdpwssd %xmm0, %xmm1, %xmm2
-vpdpwssd (%rax), %xmm1, %xmm2
+{vex} vpdpwssd %xmm0, %xmm1, %xmm2
+{vex} vpdpwssd (%rax), %xmm1, %xmm2
-vpdpwssd %ymm0, %ymm1, %ymm2
-vpdpwssd (%rax), %ymm1, %ymm2
+{vex} vpdpwssd %ymm0, %ymm1, %ymm2
+{vex} vpdpwssd (%rax), %ymm1, %ymm2
-vpdpwssds %xmm0, %xmm1, %xmm2
-vpdpwssds (%rax), %xmm1, %xmm2
+{vex} vpdpwssds %xmm0, %xmm1, %xmm2
+{vex} vpdpwssds (%rax), %xmm1, %xmm2
-vpdpwssds %ymm0, %ymm1, %ymm2
-vpdpwssds (%rax), %ymm1, %ymm2
+{vex} vpdpwssds %ymm0, %ymm1, %ymm2
+{vex} vpdpwssds (%rax), %ymm1, %ymm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -34,22 +34,22 @@ vpdpwssds (%rax), %ymm1, %ymm2
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 5 0.50 vpdpbusd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 13 0.50 * vpdpbusd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 5 0.50 vpdpbusd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 13 0.50 * vpdpbusd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 5 0.50 vpdpbusds %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 13 0.50 * vpdpbusds (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 5 0.50 vpdpbusds %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 13 0.50 * vpdpbusds (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 5 0.50 vpdpwssd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 13 0.50 * vpdpwssd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 5 0.50 vpdpwssd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 13 0.50 * vpdpwssd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 5 0.50 vpdpwssds %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 13 0.50 * vpdpwssds (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 5 0.50 vpdpwssds %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 13 0.50 * vpdpwssds (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpbusd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpbusd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpbusd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpbusd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpbusds %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpbusds (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpbusds %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpbusds (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpwssd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpwssd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpwssd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpwssd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpwssds %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpwssds (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpwssds %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpwssds (%rax), %ymm1, %ymm2
# CHECK: Resources:
# CHECK-NEXT: [0] - ADLPPort00
@@ -72,19 +72,19 @@ vpdpwssds (%rax), %ymm1, %ymm2
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpbusd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpbusd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpbusd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpbusd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpbusds %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpbusds (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpbusds %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpbusds (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpwssd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpwssd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpwssd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpwssd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpwssds %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpwssds (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpwssds %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
||
vpdpbusd %xmm0, %xmm1, %xmm2 | ||
vpdpbusd (%rax), %xmm1, %xmm2 | ||
{vex} vpdpbusd %xmm0, %xmm1, %xmm2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IceLake doesn't support AVXVNNI instrcution. Keep the AVX512 ones.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
deleted
As noted on llvm#157892 - icelake/rocketlake/tigerlake don't support avxvnni (just avx512vnni which we still have tests for)
As noted on #157892 - icelake/rocketlake/tigerlake don't support avxvnni (just avx512vnni which we still have tests for)
…ctions Noticed while checking llvm#97271 - we weren't actually testing the vex variants of the vnni instructions in the avxvnni mca tests Fixing this causes the znver4 results to change, because it turns out we didn't have consistent instruction naming for teeh avx and avx512 variants, breaking the regex matching So add the missing reg operand to the avx512 vnni instruction signatures to match avx vnni
df9e79e
to
516e8df
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Noticed while checking #97271 - discovered we weren't actually testing the vex variants of the vnni instructions in the avxvnni mca tests
Fixing this causes the znver4 results to break, because it turns out we didn't have consistent instruction naming for the avx and avx512 variants, breaking the regex matching
So add the missing reg operand to the avx512 vnni instruction signatures to match avx vnni