diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index d3e5e2591eea1..9ce64623e25b2 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -20,7 +20,6 @@ namespace llvm { class AAResults; -class AllocaInst; class BatchAAResults; class AssumptionCache; class CallBase; @@ -78,9 +77,6 @@ class MemCpyOptPass : public PassInfoMixin { Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); - bool performStackMoveOptzn(Instruction *Load, Instruction *Store, - AllocaInst *DestAlloca, AllocaInst *SrcAlloca, - uint64_t Size, BatchAAResults &BAA); void eraseInstruction(Instruction *I); bool iterateOnFunction(Function &F); diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index acc3ea36096d8..00937e0d734ab 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -69,7 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); -STATISTIC(NumStackMove, "Number of stack-move optimizations performed"); namespace { @@ -731,23 +730,6 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, return true; } - // If this is a load-store pair from a stack slot to a stack slot, we - // might be able to perform the stack-move optimization just as we do for - // memcpys from an alloca to an alloca. - if (auto *DestAlloca = dyn_cast(SI->getPointerOperand())) { - if (auto *SrcAlloca = dyn_cast(LI->getPointerOperand())) { - if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca, - DL.getTypeStoreSize(T), BAA)) { - // Avoid invalidating the iterator. - BBI = SI->getNextNonDebugInstruction()->getIterator(); - eraseInstruction(SI); - eraseInstruction(LI); - ++NumMemCpyInstr; - return true; - } - } - } - return false; } @@ -1426,210 +1408,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, return true; } -// Attempts to optimize the pattern whereby memory is copied from an alloca to -// another alloca, where the two allocas don't have conflicting mod/ref. If -// successful, the two allocas can be merged into one and the transfer can be -// deleted. This pattern is generated frequently in Rust, due to the ubiquity of -// move operations in that language. -// -// Once we determine that the optimization is safe to perform, we replace all -// uses of the destination alloca with the source alloca. We also "shrink wrap" -// the lifetime markers of the single merged alloca to before the first use -// and after the last use. Note that the "shrink wrapping" procedure is a safe -// transformation only because we restrict the scope of this optimization to -// allocas that aren't captured. -bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, - AllocaInst *DestAlloca, - AllocaInst *SrcAlloca, uint64_t Size, - BatchAAResults &BAA) { - LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n" - << *Store << "\n"); - - // Make sure the two allocas are in the same address space. - if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) { - LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n"); - return false; - } - - // 1. Check that copy is full. Calculate the static size of the allocas to be - // merged, bail out if we can't. - const DataLayout &DL = DestAlloca->getModule()->getDataLayout(); - std::optional SrcSize = SrcAlloca->getAllocationSize(DL); - if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) { - LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n"); - return false; - } - std::optional DestSize = DestAlloca->getAllocationSize(DL); - if (!DestSize || DestSize->isScalable() || - Size != DestSize->getFixedValue()) { - LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n"); - return false; - } - - // 2-1. Check that src and dest are static allocas, which are not affected by - // stacksave/stackrestore. - if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca()) - return false; - - // 2-2. Check that src and dest are never captured, unescaped allocas. Also - // collect lifetime markers first/last users in order to shrink wrap the - // lifetimes, and instructions with noalias metadata to remove them. - - SmallVector LifetimeMarkers; - Instruction *FirstUser = nullptr, *LastUser = nullptr; - SmallSet NoAliasInstrs; - - // Recursively track the user and check whether modified alias exist. - auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool { - bool CanBeNull, CanBeFreed; - return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); - }; - - auto CaptureTrackingWithModRef = - [&](Instruction *AI, - function_ref ModRefCallback) -> bool { - SmallVector Worklist; - Worklist.push_back(AI); - unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking(); - Worklist.reserve(MaxUsesToExplore); - SmallSet Visited; - while (!Worklist.empty()) { - Instruction *I = Worklist.back(); - Worklist.pop_back(); - for (const Use &U : I->uses()) { - if (Visited.size() >= MaxUsesToExplore) { - LLVM_DEBUG( - dbgs() - << "Stack Move: Exceeded max uses to see ModRef, bailing\n"); - return false; - } - if (!Visited.insert(&U).second) - continue; - switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) { - case UseCaptureKind::MAY_CAPTURE: - return false; - case UseCaptureKind::PASSTHROUGH: - // Instructions cannot have non-instruction users. - Worklist.push_back(cast(U.getUser())); - continue; - case UseCaptureKind::NO_CAPTURE: { - auto *UI = cast(U.getUser()); - if (DestAlloca->getParent() != UI->getParent()) - return false; - if (!FirstUser || UI->comesBefore(FirstUser)) - FirstUser = UI; - if (!LastUser || LastUser->comesBefore(UI)) - LastUser = UI; - if (UI->hasMetadata(LLVMContext::MD_noalias)) - NoAliasInstrs.insert(UI); - if (UI->isLifetimeStartOrEnd()) { - // We note the locations of these intrinsic calls so that we can - // delete them later if the optimization succeeds, this is safe - // since both llvm.lifetime.start and llvm.lifetime.end intrinsics - // conceptually fill all the bytes of the alloca with an undefined - // value. - int64_t Size = cast(UI->getOperand(0))->getSExtValue(); - if (Size < 0 || Size == DestSize) { - LifetimeMarkers.push_back(UI); - continue; - } - } - if (!ModRefCallback(UI)) - return false; - } - } - } - } - return true; - }; - - // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics, - // from the alloca to the Store. - ModRefInfo DestModRef = ModRefInfo::NoModRef; - MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size)); - auto DestModRefCallback = [&](Instruction *UI) -> bool { - // We don't care about the store itself. - if (UI == Store) - return true; - ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc); - // FIXME: For multi-BB cases, we need to see reachability from it to - // store. - // Bailout if Dest may have any ModRef before Store. - if (UI->comesBefore(Store) && isModOrRefSet(Res)) - return false; - DestModRef |= BAA.getModRefInfo(UI, DestLoc); - - return true; - }; - - if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) - return false; - - // 3. Check that, from after the Load to the end of the BB, - // 3-1. if the dest has any Mod, src has no Ref, and - // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes. - MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size)); - - auto SrcModRefCallback = [&](Instruction *UI) -> bool { - // Any ModRef before Load doesn't matter, also Load and Store can be - // ignored. - if (UI->comesBefore(Load) || UI == Load || UI == Store) - return true; - ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc); - if ((isModSet(DestModRef) && isRefSet(Res)) || - (isRefSet(DestModRef) && isModSet(Res))) - return false; - - return true; - }; - - if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback)) - return false; - - // We can do the transformation. First, align the allocas appropriately. - SrcAlloca->setAlignment( - std::max(SrcAlloca->getAlign(), DestAlloca->getAlign())); - - // Merge the two allocas. - DestAlloca->replaceAllUsesWith(SrcAlloca); - eraseInstruction(DestAlloca); - - // Drop metadata on the source alloca. - SrcAlloca->dropUnknownNonDebugMetadata(); - - // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists. - if (!LifetimeMarkers.empty()) { - LLVMContext &C = SrcAlloca->getContext(); - IRBuilder<> Builder(C); - - ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size); - // Create a new lifetime start marker before the first user of src or alloca - // users. - Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator()); - Builder.CreateLifetimeStart(SrcAlloca, AllocaSize); - - // Create a new lifetime end marker after the last user of src or alloca - // users. - Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator()); - Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize); - - // Remove all other lifetime markers. - for (Instruction *I : LifetimeMarkers) - eraseInstruction(I); - } - - // As this transformation can cause memory accesses that didn't previously - // alias to begin to alias one another, we remove !noalias metadata from any - // uses of either alloca. This is conservative, but more precision doesn't - // seem worthwhile right now. - for (Instruction *I : NoAliasInstrs) - I->setMetadata(LLVMContext::MD_noalias, nullptr); - - LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n"); - NumStackMove++; - return true; -} - /// Perform simplification of memcpy's. If we have memcpy A /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite /// B to be a memcpy from X to Z (or potentially a memmove, depending on @@ -1686,14 +1464,13 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( AnyClobber, MemoryLocation::getForSource(M), BAA); - // There are five possible optimizations we can do for memcpy: + // There are four possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. // b) call-memcpy xform for return slot optimization. // c) memcpy from freshly alloca'd space or space that has just started // its lifetime copies undefined data, and we can therefore eliminate // the memcpy in favor of the data that was already at the destination. // d) memcpy from a just-memset'd source can be turned into memset. - // e) elimination of memcpy via stack-move optimization. if (auto *MD = dyn_cast(SrcClobber)) { if (Instruction *MI = MD->getMemoryInst()) { if (auto *CopySize = dyn_cast(M->getLength())) { @@ -1712,8 +1489,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { } } if (auto *MDep = dyn_cast(MI)) - if (processMemCpyMemCpyDependence(M, MDep, BAA)) - return true; + return processMemCpyMemCpyDependence(M, MDep, BAA); if (auto *MDep = dyn_cast(MI)) { if (performMemCpyToMemSetOptzn(M, MDep, BAA)) { LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); @@ -1732,27 +1508,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { } } - // If the transfer is from a stack slot to a stack slot, then we may be able - // to perform the stack-move optimization. See the comments in - // performStackMoveOptzn() for more details. - auto *DestAlloca = dyn_cast(M->getDest()); - if (!DestAlloca) - return false; - auto *SrcAlloca = dyn_cast(M->getSource()); - if (!SrcAlloca) - return false; - ConstantInt *Len = dyn_cast(M->getLength()); - if (Len == nullptr) - return false; - if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(), - BAA)) { - // Avoid invalidating the iterator. - BBI = M->getNextNonDebugInstruction()->getIterator(); - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } - return false; } diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll index 8c769319236d6..3a6b3c3804b57 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -56,9 +56,11 @@ define void @write_dest_between_call_and_memcpy() { define void @write_src_between_call_and_memcpy() { ; CHECK-LABEL: @write_src_between_call_and_memcpy( +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false) ; CHECK-NEXT: store i8 1, ptr [[SRC]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 16, i1 false) ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll index 5dedf0c1e413b..3157e4c72275a 100644 --- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -20,14 +20,19 @@ declare i32 @use_maycapture(ptr noundef) declare i32 @use_readonly(ptr readonly) declare i32 @use_writeonly(ptr noundef) memory(write) +; TODO: Merge alloca and remove memcpy. define void @basic_memcpy() { ; CHECK-LABEL: define void @basic_memcpy() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -46,14 +51,19 @@ define void @basic_memcpy() { ret void } +; TODO: Merge alloca and remove memmove. define void @basic_memmove() { ; CHECK-LABEL: define void @basic_memmove() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -72,15 +82,21 @@ define void @basic_memmove() { ret void } +; TODO: Merge alloca and remove load/store. ; Tests that the optimization succeeds with a load/store pair. define void @load_store() { ; CHECK-LABEL: define void @load_store() { ; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]]) +; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]]) ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]]) +; CHECK-NEXT: [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[SRC_VAL]], ptr [[DEST]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca i32, align 4 @@ -99,15 +115,20 @@ define void @load_store() { ret void } +; TODO: Merge alloca. ; Tests that merging two allocas shouldn't be more poisonous, smaller aligned src is valid. define void @align_up() { ; CHECK-LABEL: define void @align_up() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -125,17 +146,26 @@ define void @align_up() { ret void } +; TODO: Merge alloca and remove memcpy, shrinkwrap lifetimes. ; Tests that we correctly remove extra lifetime intrinsics when performing the ; optimization. define void @remove_extra_lifetime_intrinsics() { ; CHECK-LABEL: define void @remove_extra_lifetime_intrinsics() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -158,14 +188,17 @@ define void @remove_extra_lifetime_intrinsics() { ret void } +; TODO: Merge alloca and remove memcpy, without inserting lifetime markers. ; Tests that we won't insert lifetime markers if they don't exist originally. define void @no_lifetime() { ; CHECK-LABEL: define void @no_lifetime() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -180,17 +213,23 @@ define void @no_lifetime() { ret void } + +; TODO: Merge alloca and remove memcpy. ; Tests that aliasing src or dest but no modification desn't prevent transformations. define void @alias_no_mod() { ; CHECK-LABEL: define void @alias_no_mod() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) -; CHECK-NEXT: [[DEST_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[DEST_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[DEST]], i32 0, i32 0 ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: [[SRC_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -219,15 +258,20 @@ define void @alias_no_mod() { !3 = !{!"Whatever"} +; TODO: Merge alloca and remove memcpy, remove noalias metadata on src. ; Tests that we remove scoped noalias metadata from a call. define void @remove_scoped_noalias() { ; CHECK-LABEL: define void @remove_scoped_noalias() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]), !noalias [[META0]] +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -245,15 +289,20 @@ define void @remove_scoped_noalias() { ret void } +; TODO: Merge alloca and remove memcpy, remove noalias metadata on src. ; Tests that we remove metadata on the merged alloca. define void @remove_alloca_metadata() { ; CHECK-LABEL: define void @remove_alloca_metadata() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4, !annotation [[META3:![0-9]+]] +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope [[META0]] -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]), !noalias [[META0]] +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4, !annotation !3 @@ -271,15 +320,20 @@ define void @remove_alloca_metadata() { ret void } +; TODO: Merge alloca and remove memcpy. ; Tests that we can merge alloca if the dest and src has only refs except lifetime intrinsics. define void @src_ref_dest_ref_after_copy() { ; CHECK-LABEL: define void @src_ref_dest_ref_after_copy() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_readonly(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_readonly(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_readonly(ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -301,11 +355,15 @@ define void @src_ref_dest_ref_after_copy() { define void @src_mod_dest_mod_after_copy() { ; CHECK-LABEL: define void @src_mod_dest_mod_after_copy() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_writeonly(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_writeonly(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_writeonly(ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -657,6 +715,7 @@ define void @src_captured() { ret void } +; TODO: Prevent this transformation ; Tests that failure if any modref exists before the copy, ; Exactly ref seems safe because no mod say ref would be always undefined, but to make simple and conservative. define void @mod_ref_before_copy() { @@ -690,6 +749,7 @@ define void @mod_ref_before_copy() { ret void } +; TODO: Prevent this transformation ; Tests that failure because copy semantics will change if dest is replaced with src. define void @mod_dest_before_copy() { ; CHECK-LABEL: define void @mod_dest_before_copy() { @@ -722,6 +782,7 @@ define void @mod_dest_before_copy() { ret void } +; TODO: Prevent transformations define void @mod_src_before_store_after_load() { ; CHECK-LABEL: define void @mod_src_before_store_after_load() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4