diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 94f3e0d3464fdb..4e840f6c03a191 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -40,10 +40,12 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -853,6 +855,11 @@ struct DSEState { PostDominatorTree &PDT; const TargetLibraryInfo &TLI; const DataLayout &DL; + const LoopInfo &LI; + + // Whether the function contains any irreducible control flow, useful for + // being accurately able to detect loops. + bool ContainsIrreducibleLoops; // All MemoryDefs that potentially could kill other MemDefs. SmallVector MemDefs; @@ -876,14 +883,15 @@ struct DSEState { DenseMap IOLs; DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, - PostDominatorTree &PDT, const TargetLibraryInfo &TLI) + PostDominatorTree &PDT, const TargetLibraryInfo &TLI, + const LoopInfo &LI) : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), - DL(F.getParent()->getDataLayout()) {} + DL(F.getParent()->getDataLayout()), LI(LI) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { - DSEState State(F, AA, MSSA, DT, PDT, TLI); + const TargetLibraryInfo &TLI, const LoopInfo &LI) { + DSEState State(F, AA, MSSA, DT, PDT, TLI, LI); // Collect blocks with throwing instructions not modeled in MemorySSA and // alloc-like objects. unsigned PO = 0; @@ -911,6 +919,9 @@ struct DSEState { State.InvisibleToCallerAfterRet.insert({&AI, true}); } + // Collect whether there is any irreducible control flow in the function. + State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI); + return State; } @@ -925,6 +936,12 @@ struct DSEState { isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, const MemoryLocation &Later, const MemoryLocation &Earlier, int64_t &EarlierOff, int64_t &LaterOff) { + // AliasAnalysis does not always account for loops. Limit overwrite checks + // to dependencies for which we can guarantee they are independant of any + // loops they are in. + if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier)) + return OW_Unknown; + // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll // get imprecise values here, though (except for unknown sizes). if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { @@ -1250,11 +1267,33 @@ struct DSEState { return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); } + /// Returns true if a dependency between \p Current and \p KillingDef is + /// guaranteed to be loop invariant for the loops that they are in. Either + /// because they are known to be in the same block, in the same loop level or + /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation + /// during execution of the containing function. + bool isGuaranteedLoopIndependent(const Instruction *Current, + const Instruction *KillingDef, + const MemoryLocation &CurrentLoc) { + // If the dependency is within the same block or loop level (being careful + // of irreducible loops), we know that AA will return a valid result for the + // memory dependency. (Both at the function level, outside of any loop, + // would also be valid but we currently disable that to limit compile time). + if (Current->getParent() == KillingDef->getParent()) + return true; + const Loop *CurrentLI = LI.getLoopFor(Current->getParent()); + if (!ContainsIrreducibleLoops && CurrentLI && + CurrentLI == LI.getLoopFor(KillingDef->getParent())) + return true; + // Otherwise check the memory location is invariant to any loops. + return isGuaranteedLoopInvariant(CurrentLoc.Ptr); + } + /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible /// loop. In particular, this guarantees that it only references a single /// MemoryLocation during execution of the containing function. - bool IsGuaranteedLoopInvariant(Value *Ptr) { - auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) { + bool isGuaranteedLoopInvariant(const Value *Ptr) { + auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) { Ptr = Ptr->stripPointerCasts(); if (auto *I = dyn_cast(Ptr)) { if (isa(Ptr)) @@ -1398,9 +1437,9 @@ struct DSEState { // AliasAnalysis does not account for loops. Limit elimination to // candidates for which we can guarantee they always store to the same - // memory location and not multiple locations in a loop. - if (Current->getBlock() != KillingDef->getBlock() && - !IsGuaranteedLoopInvariant(const_cast(CurrentLoc->Ptr))) { + // memory location and not located in different loops. + if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) { + LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n"); StepAgain = true; Current = CurrentDef->getDefiningAccess(); WalkerStepLimit -= 1; @@ -1529,8 +1568,16 @@ struct DSEState { return None; } - // For the KillingDef and EarlierAccess we only have to check if it reads - // the memory location. + // If this worklist walks back to the original memory access (and the + // pointer is not guarenteed loop invariant) then we cannot assume that a + // store kills itself. + if (EarlierAccess == UseAccess && + !isGuaranteedLoopInvariant(CurrentLoc->Ptr)) { + LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n"); + return None; + } + // Otherwise, for the KillingDef and EarlierAccess we only have to check + // if it reads the memory location. // TODO: It would probably be better to check for self-reads before // calling the function. if (KillingDef == UseAccess || EarlierAccess == UseAccess) { @@ -1550,16 +1597,18 @@ struct DSEState { // stores [0,1] if (MemoryDef *UseDef = dyn_cast(UseAccess)) { if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) { - if (!isInvisibleToCallerAfterRet(DefUO) && - UseAccess != EarlierAccess) { - BasicBlock *MaybeKillingBlock = UseInst->getParent(); - if (PostOrderNumbers.find(MaybeKillingBlock)->second < - PostOrderNumbers.find(EarlierAccess->getBlock())->second) { - + BasicBlock *MaybeKillingBlock = UseInst->getParent(); + if (PostOrderNumbers.find(MaybeKillingBlock)->second < + PostOrderNumbers.find(EarlierAccess->getBlock())->second) { + if (!isInvisibleToCallerAfterRet(DefUO)) { LLVM_DEBUG(dbgs() << " ... found killing def " << *UseInst << "\n"); KillingDefs.insert(UseInst); } + } else { + LLVM_DEBUG(dbgs() + << " ... found preceeding def " << *UseInst << "\n"); + return None; } } else PushMemUses(UseDef); @@ -1836,12 +1885,13 @@ struct DSEState { } }; -bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, - DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { +static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, + DominatorTree &DT, PostDominatorTree &PDT, + const TargetLibraryInfo &TLI, + const LoopInfo &LI) { bool MadeChange = false; - DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); + DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI); // For each store: for (unsigned I = 0; I < State.MemDefs.size(); I++) { MemoryDef *KillingDef = State.MemDefs[I]; @@ -2014,8 +2064,9 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { DominatorTree &DT = AM.getResult(F); MemorySSA &MSSA = AM.getResult(F).getMSSA(); PostDominatorTree &PDT = AM.getResult(F); + LoopInfo &LI = AM.getResult(F); - bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI); + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2029,6 +2080,7 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); + PA.preserve(); return PA; } @@ -2054,8 +2106,9 @@ class DSELegacyPass : public FunctionPass { MemorySSA &MSSA = getAnalysis().getMSSA(); PostDominatorTree &PDT = getAnalysis().getPostDomTree(); + LoopInfo &LI = getAnalysis().getLoopInfo(); - bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI); + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2077,6 +2130,8 @@ class DSELegacyPass : public FunctionPass { AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } }; @@ -2093,6 +2148,7 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, false) diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll index 021012aff54d2d..25051b885f9d35 100644 --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -517,8 +517,8 @@ ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: MemCpy Optimization -; GCN-O2-NEXT: Dead Store Elimination ; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Dead Store Elimination ; GCN-O2-NEXT: Canonicalize natural loops ; GCN-O2-NEXT: LCSSA Verifier ; GCN-O2-NEXT: Loop-Closed SSA Form Pass @@ -877,8 +877,8 @@ ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: MemCpy Optimization -; GCN-O3-NEXT: Dead Store Elimination ; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Dead Store Elimination ; GCN-O3-NEXT: Canonicalize natural loops ; GCN-O3-NEXT: LCSSA Verifier ; GCN-O3-NEXT: Loop-Closed SSA Form Pass diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 36370d6cd6d39c..542a79820e4f20 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -162,8 +162,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll index c39361dcc8a2df..4c5e126f229fad 100644 --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -167,8 +167,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index af77e09c249680..97b11fec08b9c9 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -167,8 +167,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index e24d3d3f57bfc3..b65e550ce9ebe6 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -148,8 +148,8 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: MemCpy Optimization -; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Dead Store Elimination ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass diff --git a/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll index 7bcb1ad79f0b01..e555e04f51d1c4 100644 --- a/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll @@ -111,7 +111,6 @@ define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias ; CHECK: for.body4.lr.ph: ; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]] -; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]] ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] ; CHECK: for.body4: @@ -179,7 +178,6 @@ define i32 @test_if(i1 %c, i32* %p, i32 %i) { ; CHECK-NEXT: [[PH:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[BB3:%.*]] ] ; CHECK-NEXT: [[INC]] = add i32 [[PH]], 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i32 [[PH]] -; CHECK-NEXT: store i32 [[I:%.*]], i32* [[GEP]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB2:%.*]], label [[BB3]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] @@ -216,7 +214,6 @@ define i32 @test_if2(i1 %c, i32* %p, i32 %i) { ; CHECK-NEXT: [[PH:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[BB2:%.*]] ], [ [[INC]], [[BB3:%.*]] ] ; CHECK-NEXT: [[INC]] = add i32 [[PH]], 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i32 [[PH]] -; CHECK-NEXT: store i32 [[I:%.*]], i32* [[GEP]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB2]], label [[BB3]] ; CHECK: bb2: ; CHECK-NEXT: store i32 2, i32* [[GEP]], align 4