Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -12404,22 +12404,22 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
bit IsCommutable> {
let ExeDomain = VTI.ExeDomain in {
defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
defm rr : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1,
VTI.RC:$src2, VTI.RC:$src3)),
IsCommutable, IsCommutable>,
EVEX, VVVV, T8, Sched<[sched]>;
defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
defm rm : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.LdFrag addr:$src3))))>,
EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8,
Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
defm rmb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
"$src2, ${src3}"#VTI.BroadcastStr,
Expand Down Expand Up @@ -12459,24 +12459,24 @@ defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul
let Predicates = [HasVNNI] in {
def : Pat<(v16i32 (add VR512:$src1,
(X86vpmaddwd_su VR512:$src2, VR512:$src3))),
(VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
(VPDPWSSDZrr VR512:$src1, VR512:$src2, VR512:$src3)>;
def : Pat<(v16i32 (add VR512:$src1,
(X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
(VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
(VPDPWSSDZrm VR512:$src1, VR512:$src2, addr:$src3)>;
}
let Predicates = [HasVNNI,HasVLX] in {
def : Pat<(v8i32 (add VR256X:$src1,
(X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
(VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
(VPDPWSSDZ256rr VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
def : Pat<(v8i32 (add VR256X:$src1,
(X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
(VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
(VPDPWSSDZ256rm VR256X:$src1, VR256X:$src2, addr:$src3)>;
def : Pat<(v4i32 (add VR128X:$src1,
(X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
(VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
(VPDPWSSDZ128rr VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
def : Pat<(v4i32 (add VR128X:$src1,
(X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
(VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
(VPDPWSSDZ128rm VR128X:$src1, VR128X:$src2, addr:$src3)>;
}

//===----------------------------------------------------------------------===//
Expand Down
174 changes: 87 additions & 87 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2939,78 +2939,78 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VPDPBUUDSYrr:
case X86::VPDPBUUDrr:
case X86::VPDPBUUDYrr:
case X86::VPDPBSSDSZ128r:
case X86::VPDPBSSDSZ128rk:
case X86::VPDPBSSDSZ128rkz:
case X86::VPDPBSSDSZ256r:
case X86::VPDPBSSDSZ256rk:
case X86::VPDPBSSDSZ256rkz:
case X86::VPDPBSSDSZr:
case X86::VPDPBSSDSZrk:
case X86::VPDPBSSDSZrkz:
case X86::VPDPBSSDZ128r:
case X86::VPDPBSSDZ128rk:
case X86::VPDPBSSDZ128rkz:
case X86::VPDPBSSDZ256r:
case X86::VPDPBSSDZ256rk:
case X86::VPDPBSSDZ256rkz:
case X86::VPDPBSSDZr:
case X86::VPDPBSSDZrk:
case X86::VPDPBSSDZrkz:
case X86::VPDPBUUDSZ128r:
case X86::VPDPBUUDSZ128rk:
case X86::VPDPBUUDSZ128rkz:
case X86::VPDPBUUDSZ256r:
case X86::VPDPBUUDSZ256rk:
case X86::VPDPBUUDSZ256rkz:
case X86::VPDPBUUDSZr:
case X86::VPDPBUUDSZrk:
case X86::VPDPBUUDSZrkz:
case X86::VPDPBUUDZ128r:
case X86::VPDPBUUDZ128rk:
case X86::VPDPBUUDZ128rkz:
case X86::VPDPBUUDZ256r:
case X86::VPDPBUUDZ256rk:
case X86::VPDPBUUDZ256rkz:
case X86::VPDPBUUDZr:
case X86::VPDPBUUDZrk:
case X86::VPDPBUUDZrkz:
case X86::VPDPWSSDZ128r:
case X86::VPDPWSSDZ128rk:
case X86::VPDPWSSDZ128rkz:
case X86::VPDPWSSDZ256r:
case X86::VPDPWSSDZ256rk:
case X86::VPDPWSSDZ256rkz:
case X86::VPDPWSSDZr:
case X86::VPDPWSSDZrk:
case X86::VPDPWSSDZrkz:
case X86::VPDPWSSDSZ128r:
case X86::VPDPWSSDSZ128rk:
case X86::VPDPWSSDSZ128rkz:
case X86::VPDPWSSDSZ256r:
case X86::VPDPWSSDSZ256rk:
case X86::VPDPWSSDSZ256rkz:
case X86::VPDPWSSDSZr:
case X86::VPDPWSSDSZrk:
case X86::VPDPWSSDSZrkz:
case X86::VPDPWUUDZ128r:
case X86::VPDPWUUDZ128rk:
case X86::VPDPWUUDZ128rkz:
case X86::VPDPWUUDZ256r:
case X86::VPDPWUUDZ256rk:
case X86::VPDPWUUDZ256rkz:
case X86::VPDPWUUDZr:
case X86::VPDPWUUDZrk:
case X86::VPDPWUUDZrkz:
case X86::VPDPWUUDSZ128r:
case X86::VPDPWUUDSZ128rk:
case X86::VPDPWUUDSZ128rkz:
case X86::VPDPWUUDSZ256r:
case X86::VPDPWUUDSZ256rk:
case X86::VPDPWUUDSZ256rkz:
case X86::VPDPWUUDSZr:
case X86::VPDPWUUDSZrk:
case X86::VPDPWUUDSZrkz:
case X86::VPDPBSSDSZ128rr:
case X86::VPDPBSSDSZ128rrk:
case X86::VPDPBSSDSZ128rrkz:
case X86::VPDPBSSDSZ256rr:
case X86::VPDPBSSDSZ256rrk:
case X86::VPDPBSSDSZ256rrkz:
case X86::VPDPBSSDSZrr:
case X86::VPDPBSSDSZrrk:
case X86::VPDPBSSDSZrrkz:
case X86::VPDPBSSDZ128rr:
case X86::VPDPBSSDZ128rrk:
case X86::VPDPBSSDZ128rrkz:
case X86::VPDPBSSDZ256rr:
case X86::VPDPBSSDZ256rrk:
case X86::VPDPBSSDZ256rrkz:
case X86::VPDPBSSDZrr:
case X86::VPDPBSSDZrrk:
case X86::VPDPBSSDZrrkz:
case X86::VPDPBUUDSZ128rr:
case X86::VPDPBUUDSZ128rrk:
case X86::VPDPBUUDSZ128rrkz:
case X86::VPDPBUUDSZ256rr:
case X86::VPDPBUUDSZ256rrk:
case X86::VPDPBUUDSZ256rrkz:
case X86::VPDPBUUDSZrr:
case X86::VPDPBUUDSZrrk:
case X86::VPDPBUUDSZrrkz:
case X86::VPDPBUUDZ128rr:
case X86::VPDPBUUDZ128rrk:
case X86::VPDPBUUDZ128rrkz:
case X86::VPDPBUUDZ256rr:
case X86::VPDPBUUDZ256rrk:
case X86::VPDPBUUDZ256rrkz:
case X86::VPDPBUUDZrr:
case X86::VPDPBUUDZrrk:
case X86::VPDPBUUDZrrkz:
case X86::VPDPWSSDZ128rr:
case X86::VPDPWSSDZ128rrk:
case X86::VPDPWSSDZ128rrkz:
case X86::VPDPWSSDZ256rr:
case X86::VPDPWSSDZ256rrk:
case X86::VPDPWSSDZ256rrkz:
case X86::VPDPWSSDZrr:
case X86::VPDPWSSDZrrk:
case X86::VPDPWSSDZrrkz:
case X86::VPDPWSSDSZ128rr:
case X86::VPDPWSSDSZ128rrk:
case X86::VPDPWSSDSZ128rrkz:
case X86::VPDPWSSDSZ256rr:
case X86::VPDPWSSDSZ256rrk:
case X86::VPDPWSSDSZ256rrkz:
case X86::VPDPWSSDSZrr:
case X86::VPDPWSSDSZrrk:
case X86::VPDPWSSDSZrrkz:
case X86::VPDPWUUDZ128rr:
case X86::VPDPWUUDZ128rrk:
case X86::VPDPWUUDZ128rrkz:
case X86::VPDPWUUDZ256rr:
case X86::VPDPWUUDZ256rrk:
case X86::VPDPWUUDZ256rrkz:
case X86::VPDPWUUDZrr:
case X86::VPDPWUUDZrrk:
case X86::VPDPWUUDZrrkz:
case X86::VPDPWUUDSZ128rr:
case X86::VPDPWUUDSZ128rrk:
case X86::VPDPWUUDSZ128rrkz:
case X86::VPDPWUUDSZ256rr:
case X86::VPDPWUUDSZ256rrk:
case X86::VPDPWUUDSZ256rrkz:
case X86::VPDPWUUDSZrr:
case X86::VPDPWUUDSZrrk:
case X86::VPDPWUUDSZrrkz:
case X86::VPMADD52HUQrr:
case X86::VPMADD52HUQYrr:
case X86::VPMADD52HUQZ128r:
Expand Down Expand Up @@ -10822,15 +10822,15 @@ bool X86InstrInfo::getMachineCombinerPatterns(
}
break;
}
case X86::VPDPWSSDZ128r:
case X86::VPDPWSSDZ128m:
case X86::VPDPWSSDZ256r:
case X86::VPDPWSSDZ256m:
case X86::VPDPWSSDZr:
case X86::VPDPWSSDZm: {
if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
Patterns.push_back(X86MachineCombinerPattern::DPWSSD);
return true;
case X86::VPDPWSSDZ128rr:
case X86::VPDPWSSDZ128rm:
case X86::VPDPWSSDZ256rr:
case X86::VPDPWSSDZ256rm:
case X86::VPDPWSSDZrr:
case X86::VPDPWSSDZrm: {
if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
Patterns.push_back(X86MachineCombinerPattern::DPWSSD);
return true;
}
break;
}
Expand Down Expand Up @@ -10866,11 +10866,11 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
MaddOpc = X86::VPMADDWDrm;
AddOpc = X86::VPADDDrr;
break;
case X86::VPDPWSSDZ128r:
case X86::VPDPWSSDZ128rr:
MaddOpc = X86::VPMADDWDZ128rr;
AddOpc = X86::VPADDDZ128rr;
break;
case X86::VPDPWSSDZ128m:
case X86::VPDPWSSDZ128rm:
MaddOpc = X86::VPMADDWDZ128rm;
AddOpc = X86::VPADDDZ128rr;
break;
Expand All @@ -10886,23 +10886,23 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
MaddOpc = X86::VPMADDWDYrm;
AddOpc = X86::VPADDDYrr;
break;
case X86::VPDPWSSDZ256r:
case X86::VPDPWSSDZ256rr:
MaddOpc = X86::VPMADDWDZ256rr;
AddOpc = X86::VPADDDZ256rr;
break;
case X86::VPDPWSSDZ256m:
case X86::VPDPWSSDZ256rm:
MaddOpc = X86::VPMADDWDZ256rm;
AddOpc = X86::VPADDDZ256rr;
break;
// vpdpwssd zmm2,zmm3,zmm1
// -->
// vpmaddwd zmm3,zmm3,zmm1
// vpaddd zmm2,zmm2,zmm3
case X86::VPDPWSSDZr:
case X86::VPDPWSSDZrr:
MaddOpc = X86::VPMADDWDZrr;
AddOpc = X86::VPADDDZrr;
break;
case X86::VPDPWSSDZm:
case X86::VPDPWSSDZrm:
MaddOpc = X86::VPMADDWDZrm;
AddOpc = X86::VPADDDZrr;
break;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86ScheduleZnver4.td
Original file line number Diff line number Diff line change
Expand Up @@ -1567,7 +1567,7 @@ def Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> {
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex
"VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)",
"VPDP(BU|WS)(S|P)(S|D|DS)(Z?|Z128?|Z256?|Y?)r(r|rk|rkz)",
"VPMADD52(H|L)UQ(Z|Z128|Z256)(r|rk|rkz)"
)>;

Expand Down
Loading