diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d6932234ec32cb..f401b54da014ef 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51665,9 +51665,11 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, if (Op0.getOpcode() == X86ISD::VBROADCAST) return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0)); - // If this scalar/subvector broadcast_load is inserted into both halves, use - // a larger broadcast_load. Update other uses to use an extracted subvector. - if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD || + // If this simple subvector or scalar/subvector broadcast_load is inserted + // into both halves, use a larger broadcast_load. Update other uses to use + // an extracted subvector. + if (ISD::isNormalLoad(Op0.getNode()) || + Op0.getOpcode() == X86ISD::VBROADCAST_LOAD || Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { auto *Mem = cast(Op0); unsigned Opc = Op0.getOpcode() == X86ISD::VBROADCAST_LOAD @@ -51682,24 +51684,6 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, } } - // If this is a simple subvector load repeated across multiple lanes, then - // broadcast the load. Update other uses to use an extracted subvector. - if (auto *Ld = dyn_cast(Op0)) { - if (Ld->isSimple() && !Ld->isNonTemporal() && - Ld->getExtensionType() == ISD::NON_EXTLOAD) { - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()}; - SDValue BcastLd = - DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, - Ld->getMemoryVT(), Ld->getMemOperand()); - DAG.ReplaceAllUsesOfValueWith( - Op0, - extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits())); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1)); - return BcastLd; - } - } - // concat_vectors(movddup(x),movddup(x)) -> broadcast(x) if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 && (Subtarget.hasAVX2() ||