diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9504abe7aa91c4..d8a2f55957b19b 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -311,6 +311,22 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) +// Check that V is either not accessible by the caller, or unwinding cannot +// occur between Start and End. +static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, + Instruction *End) { + assert(Start->getParent() == End->getParent() && "Must be in same block"); + if (!Start->getFunction()->doesNotThrow() && + !isa(getUnderlyingObject(V))) { + for (const Instruction &I : + make_range(Start->getIterator(), End->getIterator())) { + if (I.mayThrow()) + return true; + } + } + return false; +} + void MemCpyOptPass::eraseInstruction(Instruction *I) { if (MSSAU) MSSAU->removeMemoryAccess(I); @@ -848,16 +864,8 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // guaranteed to be executed if C is. As it is a non-atomic access, it // renders accesses from other threads undefined. // TODO: This is currently not checked. - // TODO: Check underlying object, so we can look through GEPs. - if (!isa(cpyDest)) { - assert(C->getParent() == cpyStore->getParent() && - "call and copy must be in the same block"); - for (const Instruction &I : make_range(C->getIterator(), - cpyStore->getIterator())) { - if (I.mayThrow()) - return false; - } - } + if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) + return false; // Check that dest points to memory that is at least as aligned as src. Align srcAlign = srcAlloca->getAlign(); @@ -1094,16 +1102,8 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, Value *DestSize = MemSet->getLength(); Value *SrcSize = MemCpy->getLength(); - // If the destination might be accessible by the caller, make sure we cannot - // unwind between the memset and the memcpy. - if (!MemCpy->getFunction()->doesNotThrow() && - !isa(getUnderlyingObject(Dest))) { - for (const Instruction &I : - make_range(MemSet->getIterator(), MemCpy->getIterator())) { - if (I.mayThrow()) - return false; - } - } + if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy)) + return false; // By default, create an unaligned memset. unsigned Align = 1; diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll index 67bdad8fa535fe..37a3cdea88e512 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -131,8 +131,9 @@ define void @dest_is_gep_may_throw_call() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1 ; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [8 x i8]* [[SRC]] to i8* ; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8 -; CHECK-NEXT: call void @accept_ptr(i8* [[SRC_I8]]) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 8, i1 false) +; CHECK-NEXT: [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8]] to [8 x i8]* +; CHECK-NEXT: [[DEST_I812:%.*]] = bitcast [8 x i8]* [[DEST_I81]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I812]]) ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8]