diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index da8b68c3906b2..e65a29d8f90e3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35146,6 +35146,22 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return N; // Return N so it doesn't get rechecked! } + // vbroadcast(vzload X) -> vbroadcast_load X + if (Src.getOpcode() == X86ISD::VZEXT_LOAD && Src.hasOneUse()) { + MemSDNode *LN = cast(Src); + if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) { + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; + SDValue BcastLd = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, + LN->getMemoryVT(), LN->getMemOperand()); + DCI.CombineTo(N.getNode(), BcastLd); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(LN); + return N; // Return N so it doesn't get rechecked! + } + } + return SDValue(); } case X86ISD::BLENDI: { diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2ed33871ecbd6..f5e06e4b02e6e 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1423,19 +1423,6 @@ multiclass avx512_subvec_broadcast_rm_dq opc, string OpcodeStr, AVX5128IBase, EVEX; } -let Predicates = [HasAVX512] in { - // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. - def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), - (VPBROADCASTQZrm addr:$src)>; -} - -let Predicates = [HasVLX] in { - // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. - def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), - (VPBROADCASTQZ128rm addr:$src)>; - def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), - (VPBROADCASTQZ256rm addr:$src)>; -} let Predicates = [HasVLX, HasBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. // This means we'll encounter truncated i32 loads; match that here. @@ -10873,8 +10860,6 @@ def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), (VMOVDDUPZ128rm addr:$src)>; -def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), - (VMOVDDUPZ128rm addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), (v2f64 VR128X:$src0)), diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 73bba723ab969..9659145a495cc 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7523,13 +7523,6 @@ defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastl defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64, v2i64, v4i64, NoVLX>; -let Predicates = [HasAVX2, NoVLX] in { - // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. - def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), - (VPBROADCASTQrm addr:$src)>; - def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), - (VPBROADCASTQYrm addr:$src)>; -} let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. // This means we'll encounter truncated i32 loads; match that here. @@ -7621,8 +7614,6 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVDDUPrr VR128:$src)>; def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), (VMOVDDUPrm addr:$src)>; - def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), - (VMOVDDUPrm addr:$src)>; } let Predicates = [HasAVX1Only] in {