diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index c66b39fc7927b8..2d87f26ae39fb5 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3727,12 +3727,13 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { /// beginning of DestBlock, which can only happen if it's safe to move the /// instruction past all of the instructions between it and the end of its /// block. -static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { +static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock, + TargetLibraryInfo &TLI) { assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa(I) || I->isEHPad() || I->mayHaveSideEffects() || + if (isa(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || I->isTerminator()) return false; @@ -3752,6 +3753,51 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { if (CI->isConvergent()) return false; } + + // Unless we can prove that the memory write isn't visibile except on the + // path we're sinking to, we must bail. + if (I->mayWriteToMemory()) { + // Check for case where the call writes to an otherwise dead alloca. This + // shows up for unused out-params in idiomatic C/C++ code. + auto *CB = cast(I); + if (!CB) + // TODO: handle e.g. store to alloca here - only worth doing if we extend + // to allow reload along used path as described below. Otherwise, this + // is simply a store to a dead allocation which will be removed. + return false; + Optional Dest = MemoryLocation::getForDest(CB, TLI); + if (!Dest) + return false; + auto *AI = dyn_cast(getUnderlyingObject(Dest->Ptr)); + if (!AI) + // TODO: allow malloc? + return false; + // TODO: allow memory access dominated by move point? Note that since AI + // could have a reference to itself captured by the call, we would need to + // account for cycles in doing so. + SmallVector AllocaUsers; + SmallPtrSet Visited; + auto pushUsers = [&](const Instruction &I) { + for (const User *U : I.users()) { + if (Visited.insert(U).second) + AllocaUsers.push_back(U); + } + }; + pushUsers(*AI); + while (!AllocaUsers.empty()) { + auto *UserI = cast(AllocaUsers.pop_back_val()); + if (isa(UserI) || isa(UserI) || + isa(UserI)) { + pushUsers(*UserI); + continue; + } + if (UserI == CB) + continue; + // TODO: support lifetime.start/end here + return false; + } + } + // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. if (I->mayReadFromMemory()) { @@ -3760,7 +3806,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { // successor block. if (DestBlock->getUniquePredecessor() != I->getParent()) return false; - for (BasicBlock::iterator Scan = I->getIterator(), + for (BasicBlock::iterator Scan = std::next(I->getIterator()), E = I->getParent()->end(); Scan != E; ++Scan) if (Scan->mayWriteToMemory()) @@ -3936,7 +3982,7 @@ bool InstCombinerImpl::run() { if (OptBB) { auto *UserParent = *OptBB; // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { + if (TryToSinkInstruction(I, UserParent, TLI)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); MadeIRChange = true; // We'll add uses of the sunk instruction below, but since diff --git a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll index 49f9e5ec2cb469..8d0259df0392c2 100644 --- a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -117,11 +117,11 @@ define i32 @sink_write_to_use(i1 %c) { ; CHECK-LABEL: @sink_write_to_use( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull writeonly [[VAR]]) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull writeonly [[VAR]]) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -140,11 +140,11 @@ define i32 @sink_readwrite_to_use(i1 %c) { ; CHECK-LABEL: @sink_readwrite_to_use( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -163,12 +163,12 @@ define i32 @sink_bitcast(i1 %c) { ; CHECK-LABEL: @sink_bitcast( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i8, align 8 -; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i8* [[VAR]] to i32* -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[BITCAST]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i8* [[VAR]] to i32* +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[BITCAST]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -189,12 +189,12 @@ define i32 @sink_gep1(i1 %c) { ; CHECK-LABEL: @sink_gep1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR1:%.*]] = alloca [2 x i32], align 8 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 1 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[GEP]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 1 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[GEP]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -215,12 +215,12 @@ define i32 @sink_gep2(i1 %c) { ; CHECK-LABEL: @sink_gep2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR1:%.*]] = alloca [2 x i32], align 8 -; CHECK-NEXT: [[VAR1_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 0 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR1_SUB]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[VAR1_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VAR1]], i64 0, i64 0 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR1_SUB]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -240,12 +240,12 @@ define i32 @sink_addrspacecast(i1 %c) { ; CHECK-LABEL: @sink_addrspacecast( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 8 -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast i32* [[VAR]] to i32 addrspace(2)* -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown.as2(i32 addrspace(2)* [[CAST]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast i32* [[VAR]] to i32 addrspace(2)* +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown.as2(i32 addrspace(2)* [[CAST]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: @@ -394,11 +394,11 @@ define i32 @sink_lifetime3(i1 %c) { ; CHECK-LABEL: @sink_lifetime3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] ; CHECK: early_return: ; CHECK-NEXT: ret i32 0 ; CHECK: use_block: +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1]] ; CHECK-NEXT: ret i32 [[VAR3]] ; entry: