Skip to content

Commit

Permalink
[GlobalISel] Translate calls to memcpy et al to G_INTRINSIC_W_SIDE_EF…
Browse files Browse the repository at this point in the history
…FECTs and legalize later.

I plan on adding memcpy optimizations in the GlobalISel pipeline, but we can't
do that unless we delay lowering to actual function calls. This patch changes
the translator to generate G_INTRINSIC_W_SIDE_EFFECTS for these functions, and
then have each target specify that using the new custom legalizer for intrinsics
hook that they want it expanded it a libcall.

Differential Revision: https://reviews.llvm.org/D64895

llvm-svn: 366516
  • Loading branch information
aemerson committed Jul 19, 2019
1 parent 3d9955c commit cf12c78
Show file tree
Hide file tree
Showing 15 changed files with 277 additions and 123 deletions.
4 changes: 2 additions & 2 deletions llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
Expand Up @@ -213,8 +213,8 @@ class IRTranslator : public MachineFunctionPass {
bool translateStore(const User &U, MachineIRBuilder &MIRBuilder);

/// Translate an LLVM string intrinsic (memcpy, memset, ...).
bool translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
unsigned ID);
bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
Intrinsic::ID ID);

void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);

Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
Expand Up @@ -236,6 +236,11 @@ createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
const CallLowering::ArgInfo &Result,
ArrayRef<CallLowering::ArgInfo> Args);

/// Create a libcall to memcpy et al.
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI,
MachineInstr &MI);

} // End namespace llvm.

#endif
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/MachineInstr.h
Expand Up @@ -1607,6 +1607,12 @@ class MachineInstr
/// to a register def in this instruction and point them to \p Reg instead.
void changeDebugValuesDefReg(unsigned Reg);

/// Returns the Intrinsic::ID for this instruction.
/// \pre Must have an intrinsic ID operand.
unsigned getIntrinsicID() const {
return getOperand(getNumExplicitDefs()).getIntrinsicID();
}

private:
/// If this instruction is embedded into a MachineFunction, return the
/// MachineRegisterInfo object for the current function, otherwise
Expand Down
76 changes: 34 additions & 42 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Expand Up @@ -1119,54 +1119,46 @@ bool IRTranslator::translateGetElementPtr(const User &U,
return true;
}

bool IRTranslator::translateMemfunc(const CallInst &CI,
bool IRTranslator::translateMemFunc(const CallInst &CI,
MachineIRBuilder &MIRBuilder,
unsigned ID) {
Intrinsic::ID ID) {

// If the source is undef, then just emit a nop.
if (isa<UndefValue>(CI.getArgOperand(1))) {
switch (ID) {
case Intrinsic::memmove:
case Intrinsic::memcpy:
case Intrinsic::memset:
return true;
default:
break;
}
}

LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
Type *DstTy = CI.getArgOperand(0)->getType();
if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
return false;
if (isa<UndefValue>(CI.getArgOperand(1)))
return true;

SmallVector<CallLowering::ArgInfo, 8> Args;
for (int i = 0; i < 3; ++i) {
const auto &Arg = CI.getArgOperand(i);
Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
ArrayRef<Register> Res;
auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true);
for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI)
ICall.addUse(getOrCreateVReg(**AI));

unsigned DstAlign = 0, SrcAlign = 0;
unsigned IsVol =
cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
->getZExtValue();

if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
DstAlign = std::max<unsigned>(MCI->getDestAlignment(), 1);
SrcAlign = std::max<unsigned>(MCI->getSourceAlignment(), 1);
} else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
DstAlign = std::max<unsigned>(MMI->getDestAlignment(), 1);
SrcAlign = std::max<unsigned>(MMI->getSourceAlignment(), 1);
} else {
auto *MSI = cast<MemSetInst>(&CI);
DstAlign = std::max<unsigned>(MSI->getDestAlignment(), 1);
}

const char *Callee;
switch (ID) {
case Intrinsic::memmove:
case Intrinsic::memcpy: {
Type *SrcTy = CI.getArgOperand(1)->getType();
if(cast<PointerType>(SrcTy)->getAddressSpace() != 0)
return false;
Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove";
break;
}
case Intrinsic::memset:
Callee = "memset";
break;
default:
return false;
}
// Create mem operands to store the alignment and volatile info.
auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
ICall.addMemOperand(MF->getMachineMemOperand(
MachinePointerInfo(CI.getArgOperand(0)),
MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
if (ID != Intrinsic::memset)
ICall.addMemOperand(MF->getMachineMemOperand(
MachinePointerInfo(CI.getArgOperand(1)),
MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));

return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
MachineOperand::CreateES(Callee),
CallLowering::ArgInfo({0}, CI.getType()), Args);
return true;
}

void IRTranslator::getStackGuard(Register DstReg,
Expand Down Expand Up @@ -1433,7 +1425,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
return translateMemfunc(CI, MIRBuilder, ID);
return translateMemFunc(CI, MIRBuilder, ID);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
Register Reg = getOrCreateVReg(CI);
Expand Down
49 changes: 49 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Expand Up @@ -317,6 +317,55 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
Args);
}

LegalizerHelper::LegalizeResult
llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();

SmallVector<CallLowering::ArgInfo, 3> Args;
for (unsigned i = 1; i < MI.getNumOperands(); i++) {
Register Reg = MI.getOperand(i).getReg();

// Need derive an IR type for call lowering.
LLT OpLLT = MRI.getType(Reg);
Type *OpTy = nullptr;
if (OpLLT.isPointer())
OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
else
OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
Args.push_back({Reg, OpTy});
}

auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
RTLIB::Libcall RTLibcall;
switch (ID) {
case Intrinsic::memcpy:
RTLibcall = RTLIB::MEMCPY;
break;
case Intrinsic::memset:
RTLibcall = RTLIB::MEMSET;
break;
case Intrinsic::memmove:
RTLibcall = RTLIB::MEMMOVE;
break;
default:
return LegalizerHelper::UnableToLegalize;
}
const char *Name = TLI.getLibcallName(RTLibcall);

MIRBuilder.setInstr(MI);
MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(RTLibcall),
MachineOperand::CreateES(Name),
CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)), Args))
return LegalizerHelper::UnableToLegalize;

return LegalizerHelper::Legalized;
}

static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
Type *FromType) {
auto ToMVT = MVT::getVT(ToType);
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
Expand Up @@ -13,7 +13,9 @@

#include "AArch64LegalizerInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
Expand Down Expand Up @@ -617,6 +619,24 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
llvm_unreachable("expected switch to return");
}

bool AArch64LegalizerInfo::legalizeIntrinsic(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
switch (MI.getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memset:
case Intrinsic::memmove:
if (createMemLibcall(MIRBuilder, MRI, MI) ==
LegalizerHelper::UnableToLegalize)
return false;
MI.eraseFromParent();
return true;
default:
break;
}
return true;
}

bool AArch64LegalizerInfo::legalizeShlAshrLshr(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
Expand Up @@ -31,6 +31,9 @@ class AArch64LegalizerInfo : public LegalizerInfo {
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const override;

bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const override;

private:
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
Expand Up @@ -153,3 +153,20 @@ bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI,

return false;
}

bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
switch (MI.getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memset:
case Intrinsic::memmove:
if (createMemLibcall(MIRBuilder, MRI, MI) ==
LegalizerHelper::UnableToLegalize)
return false;
MI.eraseFromParent();
return true;
default:
break;
}
return true;
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/Mips/MipsLegalizerInfo.h
Expand Up @@ -28,6 +28,9 @@ class MipsLegalizerInfo : public LegalizerInfo {
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const override;

bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const override;
};
} // end namespace llvm
#endif
19 changes: 19 additions & 0 deletions llvm/lib/Target/X86/X86LegalizerInfo.cpp
Expand Up @@ -13,6 +13,7 @@
#include "X86LegalizerInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
Expand Down Expand Up @@ -84,6 +85,24 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
verify(*STI.getInstrInfo());
}

bool X86LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
switch (MI.getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memset:
case Intrinsic::memmove:
if (createMemLibcall(MIRBuilder, MRI, MI) ==
LegalizerHelper::UnableToLegalize)
return false;
MI.eraseFromParent();
return true;
default:
break;
}
return true;
}

void X86LegalizerInfo::setLegalizerInfo32bit() {

const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0));
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86LegalizerInfo.h
Expand Up @@ -32,6 +32,9 @@ class X86LegalizerInfo : public LegalizerInfo {
public:
X86LegalizerInfo(const X86Subtarget &STI, const X86TargetMachine &TM);

bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const override;

private:
void setLegalizerInfo32bit();
void setLegalizerInfo64bit();
Expand Down
27 changes: 14 additions & 13 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
Expand Up @@ -1130,24 +1130,29 @@ define void @test_memcpy(i8* %dst, i8* %src, i64 %size) {
; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1
; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: $x0 = COPY [[DST]]
; CHECK: $x1 = COPY [[SRC]]
; CHECK: $x2 = COPY [[SIZE]]
; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[DST]](p0), [[SRC]](p0), [[SIZE]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0)
ret void
}

declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)*, i8 addrspace(1)*, i64, i1)
define void @test_memcpy_nonzero_as(i8 addrspace(1)* %dst, i8 addrspace(1) * %src, i64 %size) {
; CHECK-LABEL: name: test_memcpy_nonzero_as
; CHECK: [[DST:%[0-9]+]]:_(p1) = COPY $x0
; CHECK: [[SRC:%[0-9]+]]:_(p1) = COPY $x1
; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[DST]](p1), [[SRC]](p1), [[SIZE]](s64) :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 1)
call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %size, i1 0)
ret void
}

declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1)
define void @test_memmove(i8* %dst, i8* %src, i64 %size) {
; CHECK-LABEL: name: test_memmove
; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1
; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: $x0 = COPY [[DST]]
; CHECK: $x1 = COPY [[SRC]]
; CHECK: $x2 = COPY [[SIZE]]
; CHECK: BL &memmove, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memmove), [[DST]](p0), [[SRC]](p0), [[SIZE]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src)
call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0)
ret void
}
Expand All @@ -1159,11 +1164,7 @@ define void @test_memset(i8* %dst, i8 %val, i64 %size) {
; CHECK: [[SRC_C:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[SRC:%[0-9]+]]:_(s8) = G_TRUNC [[SRC_C]]
; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: $x0 = COPY [[DST]]
; CHECK: [[SRC_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[SRC]]
; CHECK: $w1 = COPY [[SRC_TMP]]
; CHECK: $x2 = COPY [[SIZE]]
; CHECK: BL &memset, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $w1, implicit $x2
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memset), [[DST]](p0), [[SRC]](s8), [[SIZE]](s64) :: (store 1 into %ir.dst)
call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i1 0)
ret void
}
Expand Down

0 comments on commit cf12c78

Please sign in to comment.