diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index 0bff1884933d8..e5f07f230fe6c 100644 --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -48,26 +48,25 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo) const { - ConstantSDNode *ConstantSize = dyn_cast(Size); - const X86Subtarget &Subtarget = - DAG.getMachineFunction().getSubtarget(); + // If to a segment-relative address space, use the default lowering. + if (DstPtrInfo.getAddrSpace() >= 256) + return SDValue(); -#ifndef NDEBUG // If the base register might conflict with our physical registers, bail out. const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI, X86::ECX, X86::EAX, X86::EDI}; - assert(!isBaseRegConflictPossible(DAG, ClobberSet)); -#endif - - // If to a segment-relative address space, use the default lowering. - if (DstPtrInfo.getAddrSpace() >= 256) + if (isBaseRegConflictPossible(DAG, ClobberSet)) return SDValue(); + ConstantSDNode *ConstantSize = dyn_cast(Size); + const X86Subtarget &Subtarget = + DAG.getMachineFunction().getSubtarget(); + // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. if (Alignment < Align(4) || !ConstantSize || - ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) + ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); @@ -128,26 +127,29 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( InGlue = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - - if (BytesLeft) { - // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; - EVT AddrVT = Dst.getValueType(); - EVT SizeVT = Size.getValueType(); - - Chain = - DAG.getMemset(Chain, dl, - DAG.getNode(ISD::ADD, dl, AddrVT, Dst, - DAG.getConstant(Offset, dl, AddrVT)), - Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, - isVolatile, AlwaysInline, - /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset)); - } + SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue}; + SDValue RepStos = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. - return Chain; + /// RepStos can process the whole length. + if (BytesLeft == 0) + return RepStos; + + // Handle the last 1 - 7 bytes. + SmallVector Results; + Results.push_back(RepStos); + unsigned Offset = SizeVal - BytesLeft; + EVT AddrVT = Dst.getValueType(); + EVT SizeVT = Size.getValueType(); + + Results.push_back( + DAG.getMemset(Chain, dl, + DAG.getNode(ISD::ADD, dl, AddrVT, Dst, + DAG.getConstant(Offset, dl, AddrVT)), + Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, + isVolatile, AlwaysInline, + /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset))); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); } /// Emit a single REP MOVS{B,W,D,Q} instruction.