Skip to content

Commit

Permalink
[GISel] Support llvm.memcpy.inline
Browse files Browse the repository at this point in the history
Differential revision: https://reviews.llvm.org/D105072
  • Loading branch information
jroelofs committed Jun 30, 2021
1 parent 2eb7bbb commit a642872
Show file tree
Hide file tree
Showing 20 changed files with 613 additions and 23 deletions.
30 changes: 30 additions & 0 deletions llvm/docs/GlobalISel/GenericOpcode.rst
Expand Up @@ -715,6 +715,36 @@ G_FENCE

I couldn't find any documentation on this at the time of writing.

G_MEMCPY
^^^^^^^^

Generic memcpy. Expects two MachineMemOperands covering the store and load
respectively, in addition to explicit operands.

G_MEMCPY_INLINE
^^^^^^^^^^^^^^^

Generic inlined memcpy. Like G_MEMCPY, but it is guaranteed that this version
will not be lowered as a call to an external function. Currently the size
operand is required to evaluate as a constant (not an immediate), though that is
expected to change when llvm.memcpy.inline is taught to support dynamic sizes.

G_MEMMOVE
^^^^^^^^^

Generic memmove. Similar to G_MEMCPY, but the source and destination memory
ranges are allowed to overlap.

G_MEMSET
^^^^^^^^

Generic memset. Expects a MachineMemOperand in addition to explicit operands.

G_BZERO
^^^^^^^

Generic bzero. Expects a MachineMemOperand in addition to explicit operands.

Control Flow
------------

Expand Down
17 changes: 13 additions & 4 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Expand Up @@ -532,16 +532,25 @@ class CombinerHelper {
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);

/// Emit loads and stores that perform the given memcpy.
/// Assumes \p MI is a G_MEMCPY_INLINE
/// TODO: implement dynamically sized inline memcpy,
/// and rename: s/bool tryEmit/void emit/
bool tryEmitMemcpyInline(MachineInstr &MI);

private:
// Memcpy family optimization helpers.
bool tryEmitMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
uint64_t KnownLen, Align DstAlign, Align SrcAlign,
bool IsVolatile);
bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
unsigned KnownLen, Align DstAlign, Align SrcAlign,
bool IsVolatile);
uint64_t KnownLen, uint64_t Limit, Align DstAlign,
Align SrcAlign, bool IsVolatile);
bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
unsigned KnownLen, Align DstAlign, Align SrcAlign,
uint64_t KnownLen, Align DstAlign, Align SrcAlign,
bool IsVolatile);
bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
unsigned KnownLen, Align DstAlign, bool IsVolatile);
uint64_t KnownLen, Align DstAlign, bool IsVolatile);

/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Support/TargetOpcodes.def
Expand Up @@ -739,6 +739,9 @@ HANDLE_TARGET_OPCODE(G_WRITE_REGISTER)
/// llvm.memcpy intrinsic
HANDLE_TARGET_OPCODE(G_MEMCPY)

/// llvm.memcpy.inline intrinsic
HANDLE_TARGET_OPCODE(G_MEMCPY_INLINE)

/// llvm.memmove intrinsic
HANDLE_TARGET_OPCODE(G_MEMMOVE)

Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/Target/GenericOpcodes.td
Expand Up @@ -1353,6 +1353,14 @@ def G_MEMCPY : GenericInstruction {
let mayStore = true;
}

def G_MEMCPY_INLINE : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size);
let hasSideEffects = false;
let mayLoad = true;
let mayStore = true;
}

def G_MEMMOVE : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall);
Expand Down
78 changes: 64 additions & 14 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Expand Up @@ -1218,7 +1218,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
}

bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
Register Val, unsigned KnownLen,
Register Val, uint64_t KnownLen,
Align Alignment, bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
Expand Down Expand Up @@ -1330,10 +1330,51 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
return true;
}

bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);

Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
Register Len = MI.getOperand(2).getReg();

const auto *MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
bool IsVolatile = MemOp->isVolatile();

// See if this is a constant length copy
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
// FIXME: support dynamically sized G_MEMCPY_INLINE
assert(LenVRegAndVal.hasValue() &&
"inline memcpy with dynamic size is not yet supported");
uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
if (KnownLen == 0) {
MI.eraseFromParent();
return true;
}

const auto &DstMMO = **MI.memoperands_begin();
const auto &SrcMMO = **std::next(MI.memoperands_begin());
Align DstAlign = DstMMO.getBaseAlign();
Align SrcAlign = SrcMMO.getBaseAlign();

return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
IsVolatile);
}

bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
Register Src, uint64_t KnownLen,
Align DstAlign, Align SrcAlign,
bool IsVolatile) {
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
return optimizeMemcpy(MI, Dst, Src, KnownLen,
std::numeric_limits<uint64_t>::max(), DstAlign,
SrcAlign, IsVolatile);
}

bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
Register Src, unsigned KnownLen,
Align DstAlign, Align SrcAlign,
bool IsVolatile) {
Register Src, uint64_t KnownLen,
uint64_t Limit, Align DstAlign,
Align SrcAlign, bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
auto &DL = MF.getDataLayout();
Expand All @@ -1343,7 +1384,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,

bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
Align Alignment = commonAlignment(DstAlign, SrcAlign);

MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
Expand All @@ -1354,7 +1394,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
// FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
// if the memcpy is in a tail call position.

unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
std::vector<LLT> MemOps;

const auto &DstMMO = **MI.memoperands_begin();
Expand Down Expand Up @@ -1437,7 +1476,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
}

bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
Register Src, unsigned KnownLen,
Register Src, uint64_t KnownLen,
Align DstAlign, Align SrcAlign,
bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
Expand Down Expand Up @@ -1550,10 +1589,6 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {

auto MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
bool IsVolatile = MemOp->isVolatile();
// Don't try to optimize volatile.
if (IsVolatile)
return false;

Align DstAlign = MemOp->getBaseAlign();
Align SrcAlign;
Expand All @@ -1571,18 +1606,33 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
if (!LenVRegAndVal)
return false; // Leave it to the legalizer to lower it to a libcall.
unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();

if (KnownLen == 0) {
MI.eraseFromParent();
return true;
}

bool IsVolatile = MemOp->isVolatile();
if (Opc == TargetOpcode::G_MEMCPY_INLINE)
return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
IsVolatile);

// Don't try to optimize volatile.
if (IsVolatile)
return false;

if (MaxLen && KnownLen > MaxLen)
return false;

if (Opc == TargetOpcode::G_MEMCPY)
return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
if (Opc == TargetOpcode::G_MEMCPY) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
bool OptSize = shouldLowerMemFuncForSize(MF);
uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
IsVolatile);
}
if (Opc == TargetOpcode::G_MEMMOVE)
return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
if (Opc == TargetOpcode::G_MEMSET)
Expand Down
15 changes: 11 additions & 4 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Expand Up @@ -1589,6 +1589,9 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
DstAlign = MCI->getDestAlign().valueOrOne();
SrcAlign = MCI->getSourceAlign().valueOrOne();
} else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) {
DstAlign = MCI->getDestAlign().valueOrOne();
SrcAlign = MCI->getSourceAlign().valueOrOne();
} else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
DstAlign = MMI->getDestAlign().valueOrOne();
SrcAlign = MMI->getSourceAlign().valueOrOne();
Expand All @@ -1597,10 +1600,12 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
DstAlign = MSI->getDestAlign().valueOrOne();
}

// We need to propagate the tail call flag from the IR inst as an argument.
// Otherwise, we have to pessimize and assume later that we cannot tail call
// any memory intrinsics.
ICall.addImm(CI.isTailCall() ? 1 : 0);
if (Opcode != TargetOpcode::G_MEMCPY_INLINE) {
// We need to propagate the tail call flag from the IR inst as an argument.
// Otherwise, we have to pessimize and assume later that we cannot tail call
// any memory intrinsics.
ICall.addImm(CI.isTailCall() ? 1 : 0);
}

// Create mem operands to store the alignment and volatile info.
auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
Expand Down Expand Up @@ -2033,6 +2038,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getOrCreateVReg(*CI.getArgOperand(0)),
MachineInstr::copyFlagsFromInstruction(CI));
return true;
case Intrinsic::memcpy_inline:
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
case Intrinsic::memcpy:
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
case Intrinsic::memmove:
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/MachineVerifier.cpp
Expand Up @@ -1477,6 +1477,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
break;
}
case TargetOpcode::G_MEMCPY_INLINE:
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE: {
ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
Expand Down Expand Up @@ -1507,6 +1508,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace())
report("inconsistent load address space", MI);

if (Opc != TargetOpcode::G_MEMCPY_INLINE)
if (!MI->getOperand(3).isImm() || (MI->getOperand(3).getImm() & ~1LL))
report("'tail' flag (operand 3) must be an immediate 0 or 1", MI);

break;
}
case TargetOpcode::G_BZERO:
Expand All @@ -1532,6 +1537,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
report("inconsistent " + Twine(Name, " address space"), MI);

if (!MI->getOperand(MI->getNumOperands() - 1).isImm() ||
(MI->getOperand(MI->getNumOperands() - 1).getImm() & ~1LL))
report("'tail' flag (last operand) must be an immediate 0 or 1", MI);

break;
}
case TargetOpcode::G_VECREDUCE_SEQ_FADD:
Expand Down
Expand Up @@ -85,6 +85,8 @@ bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return Helper.tryCombineShuffleVector(MI);
case TargetOpcode::G_MEMCPY_INLINE:
return Helper.tryEmitMemcpyInline(MI);
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET: {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
Expand Up @@ -272,6 +272,8 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return Helper.tryCombineShuffleVector(MI);
case TargetOpcode::G_MEMCPY_INLINE:
return Helper.tryEmitMemcpyInline(MI);
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET: {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
Expand Up @@ -205,6 +205,8 @@ bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return true;

switch (MI.getOpcode()) {
case TargetOpcode::G_MEMCPY_INLINE:
return Helper.tryEmitMemcpyInline(MI);
case TargetOpcode::G_CONCAT_VECTORS:
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
Expand Up @@ -42,6 +42,8 @@ bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
switch (MI.getOpcode()) {
default:
return false;
case TargetOpcode::G_MEMCPY_INLINE:
return Helper.tryEmitMemcpyInline(MI);
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD: {
Expand Down

0 comments on commit a642872

Please sign in to comment.