@@ -57970,6 +57970,51 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, const SDLoc &DL,
57970
57970
Cmov.getOperand(3));
57971
57971
}
57972
57972
57973
+ // Attempt to turn ADD(MUL(x, y), acc)) -> VPMADD52L
57974
+ // When upper 12 bits of x, y and MUL(x, y) are known to be 0
57975
+ static SDValue matchVPMADD52(SDNode *N, SelectionDAG &DAG, const SDLoc &DL,
57976
+ EVT VT, const X86Subtarget &Subtarget) {
57977
+ using namespace SDPatternMatch;
57978
+ if (!VT.isVector() || VT.getScalarSizeInBits() != 64 ||
57979
+ (!Subtarget.hasAVXIFMA() && !Subtarget.hasIFMA()))
57980
+ return SDValue();
57981
+
57982
+ // Need AVX-512VL vector length extensions if operating on XMM/YMM registers
57983
+ if (!Subtarget.hasAVXIFMA() && !Subtarget.hasVLX() &&
57984
+ VT.getSizeInBits() < 512)
57985
+ return SDValue();
57986
+
57987
+ const auto TotalSize = VT.getSizeInBits();
57988
+ if (TotalSize < 128 || !isPowerOf2_64(TotalSize))
57989
+ return SDValue();
57990
+
57991
+ SDValue X, Y, Acc;
57992
+ if (!sd_match(N, m_Add(m_Mul(m_Value(X), m_Value(Y)), m_Value(Acc))))
57993
+ return SDValue();
57994
+
57995
+ KnownBits KnownX = DAG.computeKnownBits(X);
57996
+ if (KnownX.countMinLeadingZeros() < 12)
57997
+ return SDValue();
57998
+ KnownBits KnownY = DAG.computeKnownBits(Y);
57999
+ if (KnownY.countMinLeadingZeros() < 12)
58000
+ return SDValue();
58001
+ KnownBits KnownMul = KnownBits::mul(KnownX, KnownY);
58002
+ if (KnownMul.countMinLeadingZeros() < 12)
58003
+ return SDValue();
58004
+
58005
+ auto VPMADD52Builder = [](SelectionDAG &G, SDLoc DL,
58006
+ ArrayRef<SDValue> SubOps) {
58007
+ EVT SubVT = SubOps[0].getValueType();
58008
+ assert(SubVT.getScalarSizeInBits() == 64 &&
58009
+ "Unexpected element size, only supports 64bit size");
58010
+ return G.getNode(X86ISD::VPMADD52L, DL, SubVT, SubOps[1] /*X*/,
58011
+ SubOps[2] /*Y*/, SubOps[0] /*Acc*/);
58012
+ };
58013
+
58014
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Acc, X, Y}, VPMADD52Builder,
58015
+ /*CheckBWI*/ false);
58016
+ }
58017
+
57973
58018
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
57974
58019
TargetLowering::DAGCombinerInfo &DCI,
57975
58020
const X86Subtarget &Subtarget) {
@@ -58073,6 +58118,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
58073
58118
Op0.getOperand(0), Op0.getOperand(2));
58074
58119
}
58075
58120
58121
+ if (SDValue IFMA52 = matchVPMADD52(N, DAG, DL, VT, Subtarget))
58122
+ return IFMA52;
58123
+
58076
58124
return combineAddOrSubToADCOrSBB(N, DL, DAG);
58077
58125
}
58078
58126
0 commit comments