Skip to content

Commit 361464c

Browse files
committed
[MemCpyOpt] Use memcpy source directly if dest is known to be immutable from attributes
Differential Revision: https://reviews.llvm.org/D150970
1 parent f0d97c3 commit 361464c

File tree

3 files changed

+107
-27
lines changed

3 files changed

+107
-27
lines changed

llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
7373
bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet,
7474
BatchAAResults &BAA);
7575
bool processByValArgument(CallBase &CB, unsigned ArgNo);
76+
bool processImmutArgument(CallBase &CB, unsigned ArgNo);
7677
Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
7778
Value *ByteVal);
7879
bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI);

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1604,6 +1604,101 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
16041604
return true;
16051605
}
16061606

1607+
/// This is called on memcpy dest pointer arguments attributed as immutable
1608+
/// during call. Try to use memcpy source directly if all of the following
1609+
/// conditions are satisfied.
1610+
/// 1. The memcpy dst is neither modified during the call nor captured by the
1611+
/// call. (if readonly, noalias, nocapture attributes on call-site.)
1612+
/// 2. The memcpy dst is an alloca with known alignment & size.
1613+
/// 2-1. The memcpy length == the alloca size which ensures that the new
1614+
/// pointer is dereferenceable for the required range
1615+
/// 2-2. The src pointer has alignment >= the alloca alignment or can be
1616+
/// enforced so.
1617+
/// 3. The memcpy dst and src is not modified between the memcpy and the call.
1618+
/// (if MSSA clobber check is safe.)
1619+
/// 4. The memcpy src is not modified during the call. (ModRef check shows no
1620+
/// Mod.)
1621+
bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
1622+
// 1. Ensure passed argument is immutable during call.
1623+
if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
1624+
CB.paramHasAttr(ArgNo, Attribute::NoCapture)))
1625+
return false;
1626+
const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1627+
Value *ImmutArg = CB.getArgOperand(ArgNo);
1628+
1629+
// 2. Check that arg is alloca
1630+
// TODO: Even if the arg gets back to branches, we can remove memcpy if all
1631+
// the alloca alignments can be enforced to source alignment.
1632+
auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
1633+
if (!AI)
1634+
return false;
1635+
1636+
std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
1637+
// Can't handle unknown size alloca.
1638+
// (e.g. Variable Length Array, Scalable Vector)
1639+
if (!AllocaSize || AllocaSize->isScalable())
1640+
return false;
1641+
MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
1642+
MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1643+
if (!CallAccess)
1644+
return false;
1645+
1646+
MemCpyInst *MDep = nullptr;
1647+
BatchAAResults BAA(*AA);
1648+
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1649+
CallAccess->getDefiningAccess(), Loc, BAA);
1650+
if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1651+
MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1652+
1653+
// If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
1654+
// a memcpy, check that the arg equals the memcpy dest.
1655+
if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
1656+
return false;
1657+
1658+
// The address space of the memcpy source must match the immut argument
1659+
if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1660+
ImmutArg->getType()->getPointerAddressSpace())
1661+
return false;
1662+
1663+
// 2-1. The length of the memcpy must be equal to the size of the alloca.
1664+
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1665+
if (!MDepLen || AllocaSize != MDepLen->getValue())
1666+
return false;
1667+
1668+
// 2-2. the memcpy source align must be larger than or equal the alloca's
1669+
// align. If not so, we check to see if we can force the source of the memcpy
1670+
// to the alignment we need. If we fail, we bail out.
1671+
Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
1672+
Align AllocaAlign = AI->getAlign();
1673+
if (MemDepAlign < AllocaAlign &&
1674+
getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
1675+
DT) < AllocaAlign)
1676+
return false;
1677+
1678+
// 3. Verify that the source doesn't change in between the memcpy and
1679+
// the call.
1680+
// memcpy(a <- b)
1681+
// *b = 42;
1682+
// foo(*a)
1683+
// It would be invalid to transform the second memcpy into foo(*b).
1684+
if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1685+
MSSA->getMemoryAccess(MDep), CallAccess))
1686+
return false;
1687+
1688+
// 4. The memcpy src must not be modified during the call.
1689+
if (isModSet(AA->getModRefInfo(&CB, MemoryLocation::getForSource(MDep))))
1690+
return false;
1691+
1692+
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
1693+
<< " " << *MDep << "\n"
1694+
<< " " << CB << "\n");
1695+
1696+
// Otherwise we're good! Update the immut argument.
1697+
CB.setArgOperand(ArgNo, MDep->getSource());
1698+
++NumMemCpyInstr;
1699+
return true;
1700+
}
1701+
16071702
/// Executes one iteration of MemCpyOptPass.
16081703
bool MemCpyOptPass::iterateOnFunction(Function &F) {
16091704
bool MadeChange = false;
@@ -1632,9 +1727,12 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
16321727
else if (auto *M = dyn_cast<MemMoveInst>(I))
16331728
RepeatInstruction = processMemMove(M);
16341729
else if (auto *CB = dyn_cast<CallBase>(I)) {
1635-
for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1730+
for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
16361731
if (CB->isByValArgument(i))
16371732
MadeChange |= processByValArgument(*CB, i);
1733+
else if (CB->onlyReadsMemory(i))
1734+
MadeChange |= processImmutArgument(*CB, i);
1735+
}
16381736
}
16391737

16401738
// Reprocess the instruction if desired.

llvm/test/Transforms/MemCpyOpt/memcpy.ll

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -395,13 +395,9 @@ declare void @f2(ptr)
395395
declare void @f(ptr)
396396
declare void @f_full_readonly(ptr nocapture noalias readonly)
397397

398-
; TODO: Remove memcpy, which is guaranteed to be invariant
399-
; before and after the call because of its attributes.
400398
define void @immut_param(ptr align 4 noalias %val) {
401399
; CHECK-LABEL: @immut_param(
402-
; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
403-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false)
404-
; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL1]])
400+
; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL:%.*]])
405401
; CHECK-NEXT: ret void
406402
;
407403
%val1 = alloca i8, align 4
@@ -452,12 +448,9 @@ define void @immut_param_maywrite(ptr align 4 noalias %val) {
452448
ret void
453449
}
454450

455-
; TODO: Remove memcpy
456451
define void @immut_param_readonly(ptr align 4 noalias %val) {
457452
; CHECK-LABEL: @immut_param_readonly(
458-
; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
459-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false)
460-
; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL1]])
453+
; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL:%.*]])
461454
; CHECK-NEXT: ret void
462455
;
463456
%val1 = alloca i8, align 4
@@ -466,12 +459,9 @@ define void @immut_param_readonly(ptr align 4 noalias %val) {
466459
ret void
467460
}
468461

469-
; TODO: Remove memcpy
470462
define void @immut_param_no_align(ptr align 4 noalias %val) {
471463
; CHECK-LABEL: @immut_param_no_align(
472-
; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
473-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false)
474-
; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]])
464+
; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL:%.*]])
475465
; CHECK-NEXT: ret void
476466
;
477467
%val1 = alloca i8, align 4
@@ -580,12 +570,9 @@ define void @immut_param_different_addrespace(ptr addrspace(1) align 4 noalias %
580570
ret void
581571
}
582572

583-
; TODO: remove memcpy
584573
define void @immut_param_bigger_align(ptr align 16 noalias %val) {
585574
; CHECK-LABEL: @immut_param_bigger_align(
586-
; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
587-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr [[VAL:%.*]], i64 1, i1 false)
588-
; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]])
575+
; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL:%.*]])
589576
; CHECK-NEXT: ret void
590577
;
591578
%val1 = alloca i8, align 4
@@ -608,14 +595,11 @@ define void @immut_param_smaller_align(ptr align 4 noalias %val) {
608595
ret void
609596
}
610597

611-
; TODO: remove memcpy.
612598
define void @immut_param_enforced_alignment() {
613599
; CHECK-LABEL: @immut_param_enforced_alignment(
614-
; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 1
600+
; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4
615601
; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4
616-
; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
617-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[VAL1]], ptr [[VAL]], i64 1, i1 false)
618-
; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]])
602+
; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL]])
619603
; CHECK-NEXT: ret void
620604
;
621605
%val = alloca i8, align 1
@@ -659,14 +643,11 @@ define void @immut_but_alias_src(ptr %val) {
659643
ret void
660644
}
661645

662-
; TODO: remove memcpy
663646
define void @immut_unescaped_alloca() {
664647
; CHECK-LABEL: @immut_unescaped_alloca(
665648
; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4
666649
; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4
667-
; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4
668-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL]], i64 1, i1 false)
669-
; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL1]])
650+
; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL]])
670651
; CHECK-NEXT: ret void
671652
;
672653
%val = alloca i8, align 4

0 commit comments

Comments
 (0)